Ejemplo n.º 1
0
    def test_predict_without_cv(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=340561)
        m = m.fit(x, y)

        # should not make prediction unless value is passed for lambda
        with self.assertRaises(ValueError):
            m.predict(x)
Ejemplo n.º 2
0
    def test_lambda_clip_warning(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=1729)
        m = m.fit(x, y)

        # we should get a warning when we ask for predictions at values of
        # lambda outside the range of lambda_path_
        with self.assertWarns(RuntimeWarning):
            # note, lambda_path_ is in decreasing order
            m.predict(x, lamb=m.lambda_path_[0] + 1)

        with self.assertWarns(RuntimeWarning):
            m.predict(x, lamb=m.lambda_path_[-1] - 1)
Ejemplo n.º 3
0
 def test_one_row_predict(self):
     # Verify that predicting on one row gives only one row of output
     m = ElasticNet(random_state=42)
     for X, y in self.inputs:
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)))
         assert p.shape == (1,)
Ejemplo n.º 4
0
 def test_one_row_predict_with_lambda(self):
     # One row to predict along with lambdas should give 2D output
     m = ElasticNet(random_state=42)
     for X, y in self.inputs:
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)), lamb=[20, 10])
         assert p.shape == (1, 2)
Ejemplo n.º 5
0
def glmnet_box():
    m1 = ElasticNet(n_splits=20, scoring='r2', alpha=0)
    m1.fit(music_features, box_latitude_label)
    lat_r_squared = m1.score(music_features, box_latitude_label)
    print('GLMNET ridge lattitude r2 {}'.format(lat_r_squared))
    plot_predictions(
        inverse_box_cox(m1.predict(music_features), lambda_lat,
                        90), latitude_label, 'ridge_latitude_residual.png',
        'residual vs fitted latitude for Ridge')
    m1.fit(music_features, box_longitude_label)
    lon_r_squared = m1.score(music_features, box_longitude_label)
    print('GLMNET ridge longitude r2 {}'.format(lon_r_squared))
    plot_predictions(
        inverse_box_cox(m1.predict(music_features), lambda_lon,
                        180), longitude_label, 'ridge_longitude_residual.png',
        'residual vs fitted longitude for Ridge regression')
Ejemplo n.º 6
0
def glmnet_lasso():
    m = ElasticNet(n_splits=20, scoring='r2', alpha=1)
    m.fit(music_features, box_latitude_label)
    latitude_r_squared = m.score(music_features, box_latitude_label)
    print('GLMNET lasso latitude r2 {}'.format(latitude_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lat,
                        90), latitude_label, 'lasso_latitude_residual.png',
        'residual vs fitted latitude for lasso regression')
    m.fit(music_features, box_longitude_label)
    longitude_r_squared = m.score(music_features, box_longitude_label)
    print('GLMNET lasso longitude r2 {}'.format(longitude_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lon,
                        180), longitude_label, 'lasso_longitude_residual.png',
        'residual vs fitted longitude for lasso regression')
Ejemplo n.º 7
0
    def test_with_single_var(self):
        x = np.random.rand(500,1)
        y = (1.3 * x).ravel()

        m = ElasticNet(random_state=449065)
        m = m.fit(x, y)
        self.check_r2_score(y, m.predict(x), 0.90)
Ejemplo n.º 8
0
    def test_coef_interpolation(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=1729)
        m = m.fit(x, y)

        # predict for a value of lambda between two values on the computed path
        lamb_lo = m.lambda_path_[1]
        lamb_hi = m.lambda_path_[2]

        # a value not equal to one on the computed path
        lamb_mid = (lamb_lo + lamb_hi) / 2.0

        pred_lo = m.predict(x, lamb=lamb_lo)
        pred_hi = m.predict(x, lamb=lamb_hi)
        pred_mid = m.predict(x, lamb=lamb_mid)

        self.assertFalse(np.allclose(pred_lo, pred_mid))
        self.assertFalse(np.allclose(pred_hi, pred_mid))
Ejemplo n.º 9
0
def l1_l2_regression(alpha):
    m = ElasticNet(n_splits=20, scoring='r2', alpha=alpha)
    m.fit(music_features, box_latitude_label)
    lat_r_squared = m.score(music_features, box_latitude_label)
    print('GLMNET L1 L2 alpha {} latitude r2 {}'.format(alpha, lat_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lat, 90),
        latitude_label, 'l1_l2_latitude_residual_{}.png'.format(alpha),
        'residual vs fitted latitude for l1_l2 \n regression alpha {}'.format(
            alpha))
    m.fit(music_features, box_longitude_label)
    lon_r_squared = m.score(music_features, box_longitude_label)
    print('GLMNET L1 L2 alpha {} longitude r2 {}'.format(alpha, lon_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lon, 180),
        longitude_label, 'l1_l2_longitude_residual_{}.png'.format(alpha),
        'residual vs fitted longitude for l1_l2 \n regression alpha {}'.format(
            alpha))
Ejemplo n.º 10
0
    def test_with_defaults(self):
        m = ElasticNet(random_state=2821)
        for x, y in self.inputs:
            m = m.fit(x, y)
            sanity_check_regression(m, x)

            # check selection of lambda_best
            self.assertTrue(m.lambda_best_inx_ <= m.lambda_max_inx_)

            # check full path predict
            p = m.predict(x, lamb=m.lambda_path_)
            self.assertEqual(p.shape[-1], m.lambda_path_.size)
Ejemplo n.º 11
0
 def test_edge_cases(self):
     '''Edge cases in model specification.'''
     X = np.random.random(size=(50, 10))
     w = np.random.random(size=(10, ))
     y = np.dot(X, w)
     # Edge case
     #    A single lambda is so big that it sets all estimated coefficients
     #    to zero.  This used to break the predict method.
     enet = ElasticNet(alpha=1)
     enet.fit(X, y, lambdas=[10**5])
     _ = enet.predict(X)
     # Edge case
     #    Multiple lambdas are so big as to set all estiamted coefficients
     #    to zero.  This used to break the predict method.
     enet = ElasticNet(alpha=1)
     enet.fit(X, y, lambdas=[10**5, 2 * 10**5])
     _ = enet.predict(X)
     # Edge case:
     #    Some predictors have zero varaince.  This used to break lambda
     #    max.
     X = np.random.random(size=(50, 10))
     X[:, 2] = 0
     X[:, 8] = 1
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.1)
     enet.fit(X, y)
     ol = enet.out_lambdas
     max_lambda_from_fortran = ol[1] * (ol[1] / ol[2])
     max_lambda_from_python = enet._max_lambda(X, y)
     self.assertAlmostEqual(max_lambda_from_fortran, max_lambda_from_python,
                            4)
     # Edge case.
     #     All predictors have zero variance.  This is an error in
     #     sepcification.
     with self.assertRaises(ValueError):
         X = np.ones(shape=(50, 10))
         enet = ElasticNet(alpha=.1)
         enet.fit(X, y)
Ejemplo n.º 12
0
 def test_edge_cases(self):
     '''Edge cases in model specification.'''
     X = np.random.random(size=(50,10))
     w = np.random.random(size=(10,))
     y = np.dot(X, w)
     # Edge case
     #    A single lambda is so big that it sets all estimated coefficients
     #    to zero.  This used to break the predict method.
     enet = ElasticNet(alpha=1)
     enet.fit(X, y, lambdas=[10**5])
     _ = enet.predict(X)
     # Edge case
     #    Multiple lambdas are so big as to set all estiamted coefficients
     #    to zero.  This used to break the predict method.
     enet = ElasticNet(alpha=1)
     enet.fit(X, y, lambdas=[10**5, 2*10**5])
     _ = enet.predict(X)
     # Edge case:
     #    Some predictors have zero varaince.  This used to break lambda 
     #    max.
     X = np.random.random(size=(50,10))
     X[:,2] = 0; X[:,8] = 1
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.1)
     enet.fit(X, y)
     ol = enet.out_lambdas
     max_lambda_from_fortran = ol[1] * (ol[1]/ol[2]) 
     max_lambda_from_python = enet._max_lambda(X, y)
     self.assertAlmostEqual(
         max_lambda_from_fortran, max_lambda_from_python, 4
     )
     # Edge case.
     #     All predictors have zero variance.  This is an error in 
     #     sepcification.
     with self.assertRaises(ValueError):
         X = np.ones(shape=(50,10))
         enet = ElasticNet(alpha=.1)
         enet.fit(X, y)
Ejemplo n.º 13
0
 def test_unregularized_with_weights(self):
     '''Test that fitting an unregularized model (lambda=0) gives expected
     results when sample weights are used.
     '''
     Xdn = np.random.random(size=(5000,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     y = np.dot(Xdn, w)
     sw = np.random.uniform(size=(5000,))
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp): 
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, lambdas=[0], weights=sw)
             test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01)
             self.assertTrue(test_preds)
             test_coefs = np.allclose(enet._coefficients.ravel(), w, atol=.02)
             self.assertTrue(test_coefs)
Ejemplo n.º 14
0
 def test_unregularized_with_weights(self):
     '''Test that fitting an unregularized model (lambda=0) gives expected
     results when sample weights are used.
     '''
     Xdn = np.random.random(size=(5000, 10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10, ))
     y = np.dot(Xdn, w)
     sw = np.random.uniform(size=(5000, ))
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp):
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, lambdas=[0], weights=sw)
             test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01)
             self.assertTrue(test_preds)
             test_coefs = np.allclose(enet._coefficients.ravel(),
                                      w,
                                      atol=.02)
             self.assertTrue(test_coefs)
Ejemplo n.º 15
0
 def test_unregularized_models(self):
     '''Test that fitting an unregularized model (lambda=0) gives
     expected results for both dense and sparse model matricies.
     
       We test that an unregularized model captures a perfect linear
     relationship without error.  That is, the fit parameters equals the
     true coefficients.
     '''
     Xdn = np.random.random(size=(5000,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     y = np.dot(Xdn, w)
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp): 
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, lambdas=[0])
             test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01)
             self.assertTrue(test_preds)
             test_coefs = np.allclose(enet._coefficients.ravel(), w, atol=.02)
             self.assertTrue(test_coefs)
Ejemplo n.º 16
0
 def test_unregularized_models(self):
     '''Test that fitting an unregularized model (lambda=0) gives
     expected results for both dense and sparse model matricies.
     
       We test that an unregularized model captures a perfect linear
     relationship without error.  That is, the fit parameters equals the
     true coefficients.
     '''
     Xdn = np.random.random(size=(5000, 10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10, ))
     y = np.dot(Xdn, w)
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp):
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, lambdas=[0])
             test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01)
             self.assertTrue(test_preds)
             test_coefs = np.allclose(enet._coefficients.ravel(),
                                      w,
                                      atol=.02)
             self.assertTrue(test_coefs)
    # (ii) Partially missing features (start with least missing one)
    cn_X_full_num = ['age_days']
    cn_X_full_cat = list(np.setdiff1d(cn_X_full,cn_X_full_num))

    OHE = OneHotEncoder(handle_unknown='ignore')
    scaler = StandardScaler()
    transformer = ColumnTransformer([('cat_cols', OHE, list(Xtrain.columns.isin(cn_X_full_cat))),
                                     ('num_cols', scaler, list(Xtrain.columns.isin(cn_X_full_num)))])
    enc_X = transformer.fit(Xtrain.iloc[idx_train])

    for cn in cn_partial:
        print('cn: %s' % cn)
        y_train, y_test = df_X[cn].iloc[idx_train].values, df_X[cn].iloc[idx_test].values
        mdl_lasso = ElasticNet(alpha=1, n_lambda=50, n_splits=5, random_state=1,verbose=False,n_jobs=5)
        mdl_lasso.fit(X=enc_X.transform(Xtrain.iloc[idx_train]), y=y_train)
        y_pred = mdl_lasso.predict(enc_X.transform(Xtrain.iloc[idx_test]))
        r2_pred = r2_score(y_test, y_pred)
        print('R2-score: %0.3f' % r2_pred)
        dat_Xmap[cn+'2'] = np.where(dat_Xmap[cn].isnull(),mdl_lasso.predict(enc_X.transform(Xtarget)),dat_Xmap[cn])
    
    # Assign
    dat_Xmap = dat_Xmap.assign(height=lambda x: np.where(x.height.isnull(), x.height2, x.height),
                               weight=lambda x: np.where(x.weight.isnull(), x.weight2, x.weight))
    dat_Xmap.drop(columns = ['height2', 'weight2'], inplace=True)

    # (iii) Impute the "fully" missing features
    cn_impute_new = list(np.setdiff1d(cn_impute,['workrvu','ethnicity_hispanic']))
    cn_X_full_new = list(cn_X_full) + ['height','weight','workrvu']

    # Make sure columns line up for preprocessor
    Xtarget_new = dat_Xmap[cn_X_full].copy()
Ejemplo n.º 18
0
             linestyle='None',
             marker='o',
             markersize=5,
             yerr=ridge_reg.cv_standard_error_,
             ecolor='lightgrey',
             capsize=4)

for ref, txt in zip([ridge_reg.lambda_best_, ridge_reg.lambda_max_],
                    ['Lambda best', 'Lambda max']):
    plt.axvline(x=np.log(ref), linestyle='dashed', color='lightgrey')
    plt.text(np.log(ref), .95 * plt.gca().get_ylim()[1], txt, ha='center')

plt.xlabel('log(Lambda)')
plt.ylabel('Mean-Squared Error')

y_pred = ridge_reg.predict(X_test, lamb=ridge_reg.lambda_max_)

ridge_err = mean_squared_error(y_pred, y_test)

################# Lasso Regression #####################

lasso_reg = ElasticNet(alpha=1, scoring="mean_squared_error", lambda_path=grid)

lasso_reg.fit(X_train, y_train)

lasso_reg.lambda_best_

lasso_reg.lambda_max_

plt.figure(figsize=(10, 7))
plt.errorbar(np.log(lasso_reg.lambda_path_),
Ejemplo n.º 19
0
from sklearn.datasets import make_regression

display_bar = '-' * 70

X, y = make_regression(
    n_samples=5000,
    n_features=100,
    n_informative=30,
    effective_rank=40,
    noise=.1,
)

print display_bar
print "Fit an elastic net on some fake data"
print display_bar

enet = ElasticNet(alpha=.025)
enet.fit(X, y)

print enet

print display_bar
print "Predictions vs. actuals for the last elastic net model:"
print display_bar

preds = enet.predict(X)
print y[:10]
print preds[:10, np.shape(preds)[1] - 1]

enet.plot_paths()
Ejemplo n.º 20
0
def get_glmnet_sig(sig_df, ret_sr, look_back = 12,sample_decay =1.0,num_sig_vec =[5], alpha = 0.5, signs_vec = None ):
    sig_df = sig_df.copy()
    ret_sr = ret_sr.copy()
    
    rebalance_dates = (sig_df.index.unique()).sort_values()
    
    data = sig_df
    data['y'] = ret_sr
    comb_sig_df = pd.DataFrame()
    sel_sig_names_vec = []
    print('inside')    
    
    for ind in range(look_back, rebalance_dates.shape[0] ) :

        r_d = rebalance_dates[ind]
        print(r_d)
        train_end_date = rebalance_dates[ind-1] 
        #train_end_year = train_end_date.year

        train_start_date = rebalance_dates[ind-look_back]
        #train_start_year = train_start_date.year

        #curr_year = r_d.year

        train_data = data
        train_data = train_data[train_data.index>=train_start_date]
        train_data = train_data[train_data.index<=train_end_date]

        test_data = data
        test_data = test_data[test_data.index==r_d]

        train_x = train_data.drop(['y'], axis=1)
        train_y = train_data['y']
        test_x = test_data.drop(['y'], axis=1)
        test_y = test_data['y']
        
        num_stocks = test_data.shape[0]
        sample_weights = np.ones(num_stocks*look_back)
        
        for i1 in range(look_back):
            this_i1 = range(i1*num_stocks, ((i1+1)*num_stocks)-1)
            sample_weights[this_i1] = np.exp(-sample_decay*(look_back-1-i1))
        
        model = ElasticNet(alpha=alpha, fit_intercept=True, n_lambda=1000,tol=1e-8 )
        model.fit(train_x, train_y,sample_weight= sample_weights, signs_vec=signs_vec)
        
        this_comb_sig_df = pd.DataFrame()
        sel_sig_names = []
        
        for num_sig in num_sig_vec :
            s, w_ind = get_lambda(model, num_sig)
            #print(s)
            #print(w_ind)
            this_sel_sig = test_x.columns[w_ind].values
            if this_sel_sig.shape[0] < num_sig :
                this_sel_sig = np.append( this_sel_sig, 
                                         ['NA']*(num_sig - this_sel_sig.shape[0]) )
            #print(this_sel_sig)
            this_sig = model.predict(test_x, s)       
            this_sig = pd.Series(this_sig, index = test_x.index)
            this_sig = this_sig / np.sum( np.abs( this_sig ) )
                      
            this_comb_sig_df[str(num_sig) ] = this_sig
            sel_sig_names.append(this_sel_sig)
        
        sel_sig_names_vec.append(sel_sig_names)
        this_comb_sig_df.index = test_x.index
        #this_comb_sig_df = this_comb_sig_df.rank(axis=0)
        #this_comb_sig_df = ( this_comb_sig_df - this_comb_sig_df.mean(axis=0) ) / this_comb_sig_df.std(axis=0)      
         
        
        comb_sig_df = comb_sig_df.append(this_comb_sig_df)
    
    return(comb_sig_df, sel_sig_names_vec)
Ejemplo n.º 21
0
 def test_cv_scoring(self):
     x, y = self.inputs[0]
     for method in self.scoring:
         m = ElasticNet(scoring=method, random_state=1729)
         m = m.fit(x, y)
         self.check_r2_score(y, m.predict(x), 0.90, scoring=method)
Ejemplo n.º 22
0
 def test_alphas(self):
     x, y = self.inputs[0]
     for alpha in self.alphas:
         m = ElasticNet(alpha=alpha, random_state=2465)
         m = m.fit(x, y)
         self.check_r2_score(y, m.predict(x), 0.90, alpha=alpha)
Ejemplo n.º 23
0
from sklearn.datasets import make_regression

display_bar = '-'*70

X, y = make_regression(
    n_samples = 5000,
    n_features = 100,
    n_informative = 30,
    effective_rank = 40,
    noise = .1,
)

print display_bar
print "Fit an elastic net on some fake data"
print display_bar

enet = ElasticNet(alpha=.025)
enet.fit(X, y)

print enet

print display_bar
print "Predictions vs. actuals for the last elastic net model:"
print display_bar

preds = enet.predict(X)
print y[:10]
print preds[:10,np.shape(preds)[1]-1]

enet.plot_paths()