Example #1
0
 def test_max_features(self):
     x, y = self.inputs[3]
     max_features = 5
     m = ElasticNet(n_splits=3, random_state=42, max_features=max_features)
     m = m.fit(x, y)
     num_features = np.count_nonzero(m.coef_)
     self.assertTrue(num_features <= max_features)
Example #2
0
    def test_ridge_models(self):
        '''Test that a pure ridge (alpha=0) model gives expected results
        for both dense and sparse matricies.

          We test that the ridge model, when fit on uncorrelated predictors,
        shrinks the parameter estiamtes uniformly.  To see this, we generate
        linearly related data with a correlation free model matrix, then test
        that the array of ratios of fit parameters to true coefficients is 
        a constant array.
        
        This test generates more samples than the others to guarentee that the
        data is sufficiently correlation free, otherwise the effect to be 
        measured does not occur.
        '''
        Xdn = np.random.random(size=(50000,3))
        Xsp = csc_matrix(Xdn)
        w = np.random.random(size=(3,))
        for X in (Xdn, Xsp):
            for lam in np.linspace(0, 1, 10):
                y = np.dot(Xdn, w)
                enet = ElasticNet(alpha=0)
                enet.fit(X, y, lambdas=[lam])
                ratios = enet._coefficients.ravel() / w
                norm_ratios = ratios / np.max(ratios)
                test = np.allclose(
                    norm_ratios, 1, atol=.05
                )
                self.assertTrue(test)
    def _notify(self, trainer):
        if not self.computed:
            X = trainer.X.cpu().detach().numpy()
            y = trainer.y.cpu().detach().numpy()

            glmnet = ElasticNet(n_splits=0,
                                fit_intercept=False,
                                lambda_path=np.exp(
                                    np.linspace(start=np.log(10**(-9)),
                                                stop=np.log(10),
                                                num=200)))
            glmnet.fit(X, y.squeeze())
            self.lambdas, self.coefs = glmnet.lambda_path_, glmnet.coef_path_
            # Swap axes for parameters learned by lasso path, so that lambda
            # corresponds to the same axis as time for gradient descent iterates.
            self.coefs = np.swapaxes(self.coefs, 0, 1)
            # So now self.coefs is of shape (n_lambda, n_params).
            for lambda_id in range(self.coefs.shape[0]):
                w_lambda = self.coefs[lambda_id, :]
                self.append_performance_metrics(w_lambda, trainer)

            if self.store_path is False:
                # Dummy array so that _aggregate_numeric_results works.
                self.coefs = [np.array([[-1], [-1]])]

            self.computed = True
Example #4
0
    def test_ridge_models(self):
        '''Test that a pure ridge (alpha=0) model gives expected results
        for both dense and sparse matricies.

          We test that the ridge model, when fit on uncorrelated predictors,
        shrinks the parameter estiamtes uniformly.  To see this, we generate
        linearly related data with a correlation free model matrix, then test
        that the array of ratios of fit parameters to true coefficients is 
        a constant array.
        
        This test generates more samples than the others to guarentee that the
        data is sufficiently correlation free, otherwise the effect to be 
        measured does not occur.
        '''
        Xdn = np.random.random(size=(50000, 3))
        Xsp = csc_matrix(Xdn)
        w = np.random.random(size=(3, ))
        for X in (Xdn, Xsp):
            for lam in np.linspace(0, 1, 10):
                y = np.dot(Xdn, w)
                enet = ElasticNet(alpha=0)
                enet.fit(X, y, lambdas=[lam])
                ratios = enet._coefficients.ravel() / w
                norm_ratios = ratios / np.max(ratios)
                test = np.allclose(norm_ratios, 1, atol=.05)
                self.assertTrue(test)
Example #5
0
    def TL(self, x, y, pen_bic, pen_gic):
        m = ElasticNet()
        if len(x.shape) < 2:
            x.reshape((x.shape[0], 1))

        m = m.fit(x, y)
        betas = m.coef_path_
        intercepts = m.intercept_path_
        BIC = np.inf
        for i in range(betas.shape[1]):

            RSS = np.sum((y - np.matmul(x, betas[:, i]) - intercepts[i])**2)
            k = np.sum(betas[:, i] != 0)
            BIC_new = RSS + pen_bic * k
            if BIC_new < BIC:
                BIC = BIC_new
                beta_bic = betas[:, i]
                intercept = intercepts[i]
        thresholds = beta_bic[beta_bic > 0]
        thresholds.sort()
        beta_gic = np.zeros_like(beta_bic)
        GIC = np.inf
        for delta in thresholds:
            beta_thres = deepcopy(beta_bic)
            beta_thres[beta_thres < delta] = 0
            RSS = np.sum((y - np.matmul(x, beta_thres) - intercept)**2)
            k = np.sum(beta_thres != 0)
            GIC_new = RSS + pen_gic * k
            if GIC_new < GIC:
                GIC = GIC_new
                beta_gic = beta_thres

        return beta_gic, intercept
Example #6
0
 def test_random_state_cv(self):
     random_state = 133
     m = ElasticNet(random_state=random_state)
     x, y = self.inputs[0]
     m.fit(x, y)
     print(dir(m._cv))
     assert m._cv.random_state == random_state
Example #7
0
def ode_integratedlasso_rank_vars(D,
                                  times,
                                  target,
                                  env=None,
                                  silent=True,
                                  interactions=True,
                                  rm_target=True):

    L = len(times)
    d = D.shape[1] // L
    n = D.shape[0]

    Xint = np.zeros([(L - 1) * n, d])
    deltaY = np.zeros([(L - 1) * n])
    for i in range(n):
        deltaY[(i) * (L - 1):(i + 1) * (L - 1)] = np.diff(
            D[i, target * L:(target + 1) * L])
    for j in range(d):
        for i in range(n):
            tmp = D[i, j * L:(j + 1) * L]
            Xint[i * (L - 1):(i + 1) * (L - 1),
                 j] = (tmp[:(L - 1)] + tmp[1:]) / 2 * np.diff(times)

    # remove NAs
    na_ind = np.logical_or(np.isnan(deltaY), (np.isnan(Xint) > 0).sum(axis=1))
    deltaY = deltaY[~na_ind]
    Xint = Xint[~na_ind, ]

    # Perform lasso
    if interactions:
        dC2 = d * (d - 1) // 2  # combination
        var_names = np.zeros([d + dC2 + d], dtype=np.object)
        var_names[:d] = np.array([[]] + [[i] for i in range(d)],
                                 dtype=np.object)[1:]
        var_names[d:] = np.array([[]] + sum(
            ([[i, j] for j in range(i + 1)] for i in range(d)), []),
                                 dtype=np.object)[1:]
        Xint_interactions = np.zeros([n * (L - 1), len(var_names)])
        Xint_interactions[:, :d] = Xint
        for i in range(d, len(var_names)):
            Xint_interactions[:, i] = Xint[:, var_names[i]
                                           [0]] * Xint[:, var_names[i][1]]
        fit = ElasticNet().fit(Xint_interactions, deltaY)
        sel_matrix = (np.abs(fit.coef_path_) > 1e-7)
        first_entrance = sel_matrix.max(axis=1)
        # find all rows without ones and set first entrance to Inf
        first_entrance[sel_matrix.sum(axis=1) == 0] = np.infty
        ranking = first_entrance.argsort()
        ranking = var_names[ranking]
    else:
        fit = ElasticNet().fit(Xint, deltaY)
        sel_matrix = fit.coef_path_ != 0
        first_entrance = sel_matrix.max(axis=1)
        # find all rows without ones and set first entrance to Inf
        first_entrance[sel_matrix.sum(axis=1) == 0] = np.infty
        ranking = first_entrance.argsort()
    if rm_target:
        ranking = ranking[ranking != target]

    return ({'ranking': ranking, 'coef': fit.coef_})
Example #8
0
 def test_validate_weights(self):
     X = np.random.random(size=(50, 10))
     w = np.random.random(size=(10, ))
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.5)
     # Invalid use
     #    Passing in a sample weight vector that is too short.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(49, ))
         enet._validate_weights(X, y, weights=sw)
     # Invalid use:
     #    Passing in a weight vector that matches the wrong dimenstion of X.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(10, ))
         enet._validate_weights(X, y, weights=sw)
     # Invalid use:
     #    Passing in a weight vector containing a negative entry.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(50, ))
         sw[25] = -1
         enet._validate_weights(X, y, weights=sw)
     # Valid Use:
     #    Weight vector of the correct dimension with all non-negative
     #    entries.
     sw = np.ones(shape=(50, ))
     enet._validate_weights(X, y, weights=sw)
Example #9
0
 def test_one_row_predict(self):
     # Verify that predicting on one row gives only one row of output
     m = ElasticNet(random_state=42)
     for X, y in self.inputs:
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)))
         assert p.shape == (1,)
class knockoff_lasso(knockoff_net):
    """ Preforms the knockoff technique with lasso """
    
    def fit(self,X_lrg=None):
        """ Generates the knockoffs, fits the regression, and performs the FDR calculations """
        # Generate knockoff as inherited from knockoff_net
        if X_lrg is None:
            if   self.knockoff_type == 'original': self._original_knockoff()
            elif self.knockoff_type == 'binary':   self._binary_knockoff()
        else:
            self.X_lrg = X_lrg

        # initialize the glmnet object
        self.elasticnet = ElasticNet(alpha=1,n_lambdas=self.p*20,frac_lg_lambda=min(.000001,.01/(self.p**2))) 
        self.elasticnet.fit(self.X_lrg,self.y,normalize=False,include_intercept=self.intercept)

        # pull out some values from the glmnet object and clean
        self.lambdas = self.elasticnet.out_lambdas
        self.var_index_ent = np.sort(self.elasticnet._indices)
        self.coef_matrix = np.zeros((2*self.p,self.elasticnet.n_lambdas))
        self.coef_matrix[self.var_index_ent] = self.elasticnet._comp_coef.squeeze()[self.elasticnet._indices]

        # figure out when different variables entered the model
        self.var_entered = np.zeros(2*self.p).astype(bool)
        self.var_entered[self.var_index_ent] = True

        # Preform all the FDR calculations as inherited from knockoff_net
        self._get_z()
        self._get_w()
        self._get_T()
        self._get_S()
Example #11
0
 def test_one_row_predict_with_lambda(self):
     # One row to predict along with lambdas should give 2D output
     m = ElasticNet(random_state=42)
     for X, y in self.inputs:
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)), lamb=[20, 10])
         assert p.shape == (1, 2)
Example #12
0
    def test_with_single_var(self):
        x = np.random.rand(500,1)
        y = (1.3 * x).ravel()

        m = ElasticNet(random_state=449065)
        m = m.fit(x, y)
        self.check_r2_score(y, m.predict(x), 0.90)
Example #13
0
 def test_coef_limits(self):
         x, y = self.inputs[0]
         lower_limits = np.repeat(-1, x.shape[1])
         upper_limits = 0
         m = ElasticNet(lower_limits=lower_limits, upper_limits=upper_limits, random_state=5934, alpha=0)
         m = m.fit(x, y)
         assert(np.all(m.coef_ >= -1))
         assert(np.all(m.coef_ <= 0))
Example #14
0
    def test_predict_without_cv(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=340561)
        m = m.fit(x, y)

        # should not make prediction unless value is passed for lambda
        with self.assertRaises(ValueError):
            m.predict(x)
Example #15
0
    def test_with_pandas_df(self):
        x, y = make_regression(random_state=561)
        df = pd.DataFrame(x)
        df['y'] = y

        m = ElasticNet(n_folds=3, random_state=123)
        m = m.fit(df.drop(['y'], axis=1), df.y)
        sanity_check_regression(m, x)
    def test_with_pandas_df(self):
        x, y = make_regression(random_state=561)
        df = pd.DataFrame(x)
        df['y'] = y

        m = ElasticNet(n_splits=3, random_state=123)
        m = m.fit(df.drop(['y'], axis=1), df.y)
        sanity_check_regression(m, x)
Example #17
0
    def test_with_no_predictor_variance(self):
        x = np.ones((500, 1))
        y = np.random.rand(500)

        m = ElasticNet(random_state=561)
        msg = "All predictors have zero variance (glmnet error no. 7777)."
        with self.assertRaises(ValueError, msg=msg):
            m.fit(x, y)
Example #18
0
 def test_coef_limits(self):
     x, y = self.inputs[0]
     lower_limits = 0
     upper_limits = np.repeat(1, x.shape[1])
     m = ElasticNet(lower_limits=lower_limits,
                    upper_limits=upper_limits,
                    random_state=5934)
     m = m.fit(x, y)
     assert (np.all(m.coef_) >= 0)
     assert (np.all(m.coef_) <= 1)
Example #19
0
 def test_validate_weights(self):
     X = np.random.random(size=(50,10))
     w = np.random.random(size=(10,))
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.5)
     # Invalid use
     #    Passing in a sample weight vector that is too short.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(49,))
         enet._validate_weights(X, y, weights=sw)
     # Invalid use:
     #    Passing in a weight vector that matches the wrong dimenstion of X.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(10,))
         enet._validate_weights(X, y, weights=sw)
     # Invalid use:
     #    Passing in a weight vector containing a negative entry. 
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(50,))
         sw[25] = -1
         enet._validate_weights(X, y, weights=sw)
     # Valid Use:
     #    Weight vector of the correct dimension with all non-negative 
     #    entries.
     sw = np.ones(shape=(50,))
     enet._validate_weights(X, y, weights=sw)
Example #20
0
 def test_n_splits(self):
     x, y = self.inputs[0]
     for n in self.n_splits:
         m = ElasticNet(n_splits=n, random_state=6601)
         if n > 0 and n < 3:
             with self.assertRaisesRegexp(ValueError,
                                          "n_splits must be at least 3"):
                 m = m.fit(x, y)
         else:
             m = m.fit(x, y)
             sanity_check_regression(m, x)
Example #21
0
def test_fit_cv_glmnet_comparison():
    rng = np.random.default_rng(SEED)
    error = rng.normal(loc=0, scale=1, size=100)
    X = rng.normal(loc=5, scale=2, size=(100, 4))
    true_betas = np.array([1, -2, 0.5, 1])
    y = X.dot(true_betas) + error
    m = Elnet(n_splits=3, random_state=182, scoring="r2")
    m.fit(X, y)
    m2 = ElasticNet(n_splits=3, random_state=182)
    m2.fit(X, y)
    np.testing.assert_almost_equal(m.lambda_max_, m2.lambda_max_)
    np.testing.assert_almost_equal(m.lambda_1se_, m2.lambda_best_[0])
Example #22
0
    def test_with_defaults(self):
        m = ElasticNet(random_state=2821)
        for x, y in self.inputs:
            m = m.fit(x, y)
            sanity_check_regression(m, x)

            # check selection of lambda_best
            self.assertTrue(m.lambda_best_inx_ <= m.lambda_max_inx_)

            # check full path predict
            p = m.predict(x, lamb=m.lambda_path_)
            self.assertEqual(p.shape[-1], m.lambda_path_.size)
Example #23
0
 def test_validate_matrix(self):
     '''Test the _validate_matrix method.'''
     Xdn = np.random.random(size=(50,10))
     enet = ElasticNet(alpha=.5)
     # Invalid use:
     #   Passing in a sparse matrix in the incorrect format.
     with self.assertRaises(ValueError):
         Xsp = csr_matrix(Xdn)
         enet._validate_matrix(Xsp)
     # Valid use:
     #   Passing in a matrix in compressed sparse column format.
     Xsp = csc_matrix(Xdn)
     enet._validate_matrix(Xsp)
Example #24
0
def _parallel_permute_count_nonzero_penalised_coefs(xp, yp, lam_path,
                                                    penalties, norm_num,
                                                    is_regression):
    from glmnet import ElasticNet, LogitNet
    np.random.shuffle(yp)

    params = dict(alpha=norm_num, lambda_path=lam_path)
    pm = ElasticNet(**params) if is_regression else LogitNet(**params)
    pm.fit(xp, yp, relative_penalties=penalties)

    return np.sign(
        np.abs(np.squeeze(pm.coef_path_)) *
        vec_to_array(penalties)).sum(axis=0)
Example #25
0
    def run(self, epochs: int):
        print("Starting ElasticNet simulation.")

        # First, set up the data loaders, that will be used for computing
        # metrics via the self.executor object.
        self._reset_data_loaders()

        # The epochs parameter will be ignored but it is needed for
        # compatibility of the rprml.core.Executor class.
        X = self.train_dataset.X.cpu().detach().numpy()
        y = self.train_dataset.y.cpu().detach().numpy()

        # Lambda path has to be supplied in decreasing order.
        lambda_path = np.array(self.lambdas)
        lambda_path = -np.sort(-lambda_path)
        # Append infinity to the front of lambda_path.
        # We do this because the glmnet package modifies the first lambda.
        lambda_path = np.insert(lambda_path, 0, 1e10000, axis=0)

        glmnet = ElasticNet(alpha=self.alpha,
                            n_splits=0,
                            fit_intercept=False,
                            standardize=False,
                            lambda_path=lambda_path)
        glmnet.fit(X, y.squeeze())

        lambdas, coefs = glmnet.lambda_path_, glmnet.coef_path_
        # Swap axes for parameters learned by lasso path, so that lambda
        # corresponds to the first axis.
        coefs = np.swapaxes(coefs, 0, 1)
        # Remove the first value of lambda (because glmnet modifies its value)
        # and remove the associated fitted vector.
        lambdas = np.array(lambdas).flatten()[1:]
        coefs = np.array(coefs)[1:, :]

        # Save lambdas and alpha to the executor's history.
        self.executor.history['lambdas'] = lambdas
        self.executor.history['alpha'] = self.alpha

        # For each fitted model, compute the metrics registered to
        # self.executor.
        for lambda_id in range(coefs.shape[0]):
            w_lambda = coefs[lambda_id, :]
            w_lambda = torch.tensor(w_lambda,
                                    dtype=torch.float32,
                                    device=self.device)
            self.model.set_w(w_lambda)
            # Compute the metrics associated to w_lambda.
            self.trainer.fire_event(_iteration_level_event)
def FeatureSelection(df_x, xtrain, ytrain, exclude_cols=[]):

    # #### Gam
    # gam = LinearGAM(n_splines=4).gridsearch(xtrain, ytrain)
    # pvalues = np.array(gam.statistics_['p_values'])
    # important_x_idx_gam = [idx-1 for idx in np.where(pvalues < 0.1)[0]]
    # important_x_gam = df_x.iloc[:, important_x_idx_gam]

    #### Lasso
    lasso = ElasticNet(alpha=1, n_splits=10, random_state=123, n_jobs=4)
    lasso.fit(xtrain, ytrain)
    coeffs = lasso.coef_
    important_x_dx_lasso = np.where(coeffs != 0.)[0]
    important_x_lasso = [col for col in df_x.columns[important_x_dx_lasso]]
    return (important_x_lasso, important_x_dx_lasso)
Example #27
0
def train_and_test(basename):
    d = np.load("train_test_" + basename + ".npz")
    train_x = d["train_x"]
    train_y = d["train_y"]
    test_x = d["test_x"]
    test_y = d["test_y"]
    countrylist = d["countrylist"]
    numcountries = train_x.shape[1]
    weights = np.zeros((numcountries,numcountries))
    enets = [None]*numcountries
    enet_cvs = [None]*numcountries
    preds = np.zeros(test_y.shape)
    errors = np.zeros(test_y.shape)
    var_ratio = np.zeros((numcountries))
    for i in range(numcountries):
	enets[i] = ElasticNet(alpha=.1)
	enet_cvs[i] = CVGlmNet(enets[i], n_folds=10, n_jobs=10)
	enet_cvs[i].fit(train_x, train_y[:,i])
	bli = enet_cvs[i].best_lambda_idx
	weights[i,:] = enet_cvs[i].base_estimator.get_coefficients_from_lambda_idx(bli)
	preds[:,i] = enet_cvs[i].predict(test_x)
	errors[:,i] = test_y[:,i] - preds[:,i]
	var_truth = np.var(test_y[:,i])
	var_err = np.var(errors[:,i])
	var_ratio[i] = 1 - var_err/var_truth
	print("finished predicting country number %d" % i)
    np.savez("results_" + basename + ".npz", preds = preds, truth=test_y, errors = errors, var_ratio = var_ratio, countrylist = countrylist, weights=weights)
Example #28
0
def glmnet_box():
    m1 = ElasticNet(n_splits=20, scoring='r2', alpha=0)
    m1.fit(music_features, box_latitude_label)
    lat_r_squared = m1.score(music_features, box_latitude_label)
    print('GLMNET ridge lattitude r2 {}'.format(lat_r_squared))
    plot_predictions(
        inverse_box_cox(m1.predict(music_features), lambda_lat,
                        90), latitude_label, 'ridge_latitude_residual.png',
        'residual vs fitted latitude for Ridge')
    m1.fit(music_features, box_longitude_label)
    lon_r_squared = m1.score(music_features, box_longitude_label)
    print('GLMNET ridge longitude r2 {}'.format(lon_r_squared))
    plot_predictions(
        inverse_box_cox(m1.predict(music_features), lambda_lon,
                        180), longitude_label, 'ridge_longitude_residual.png',
        'residual vs fitted longitude for Ridge regression')
Example #29
0
def glmnet_lasso():
    m = ElasticNet(n_splits=20, scoring='r2', alpha=1)
    m.fit(music_features, box_latitude_label)
    latitude_r_squared = m.score(music_features, box_latitude_label)
    print('GLMNET lasso latitude r2 {}'.format(latitude_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lat,
                        90), latitude_label, 'lasso_latitude_residual.png',
        'residual vs fitted latitude for lasso regression')
    m.fit(music_features, box_longitude_label)
    longitude_r_squared = m.score(music_features, box_longitude_label)
    print('GLMNET lasso longitude r2 {}'.format(longitude_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lon,
                        180), longitude_label, 'lasso_longitude_residual.png',
        'residual vs fitted longitude for lasso regression')
Example #30
0
def test_linear_glmnet(benchmark, alpha, n_obs):
    rng = np.random.default_rng(SEED)
    error = rng.normal(loc=0, scale=1, size=n_obs)
    X = rng.normal(loc=5, scale=2, size=(n_obs, 4))
    true_betas = np.array([1, -2, 0.5, 1])
    y = X.dot(true_betas) + error
    m = ElasticNet(alpha=alpha)
    benchmark(m.fit, X, y)
Example #31
0
 def test_unregularized_with_weights(self):
     '''Test that fitting an unregularized model (lambda=0) gives expected
     results when sample weights are used.
     '''
     Xdn = np.random.random(size=(5000,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     y = np.dot(Xdn, w)
     sw = np.random.uniform(size=(5000,))
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp): 
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, lambdas=[0], weights=sw)
             test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01)
             self.assertTrue(test_preds)
             test_coefs = np.allclose(enet._coefficients.ravel(), w, atol=.02)
             self.assertTrue(test_coefs)
Example #32
0
 def test_ridge_with_weights(self):
     '''Test that a pure ridge (alpha=0) model gives expected results
     for both dense and sparse matricies.
     '''
     Xdn = np.random.random(size=(50000, 3))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(3, ))
     sw = np.random.uniform(size=(50000, ))
     sw = sw / np.sum(sw)
     for X in (Xdn, Xsp):
         for lam in np.linspace(0, 1, 10):
             y = np.dot(Xdn, w)
             enet = ElasticNet(alpha=0)
             enet.fit(X, y, lambdas=[lam], weights=sw)
             ratios = enet._coefficients.ravel() / w
             norm_ratios = ratios / np.max(ratios)
             test = np.allclose(norm_ratios, 1, atol=.05)
             self.assertTrue(test)
Example #33
0
 def test_lasso_with_weights(self):
     '''Test that a pure lasso (alpha=1) model gives expected results when
     sample weights are used.        
     '''
     Xdn = np.random.random(size=(25000, 10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10, ))
     sw = np.random.uniform(size=(25000, ))
     sw = sw / np.sum(sw)
     for w_mask in range(1, 10):
         for X in (Xdn, Xsp):
             w_masked = w.copy()
             w_masked[w_mask:] = 0
             y = np.dot(Xdn, w_masked)
             enet = ElasticNet(alpha=1)
             enet.fit(X, y, lambdas=[.01], weights=sw)
             test = (len(enet._coefficients.ravel() == w_mask))
             self.assertTrue(test)
Example #34
0
def l1_l2_regression(alpha):
    m = ElasticNet(n_splits=20, scoring='r2', alpha=alpha)
    m.fit(music_features, box_latitude_label)
    lat_r_squared = m.score(music_features, box_latitude_label)
    print('GLMNET L1 L2 alpha {} latitude r2 {}'.format(alpha, lat_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lat, 90),
        latitude_label, 'l1_l2_latitude_residual_{}.png'.format(alpha),
        'residual vs fitted latitude for l1_l2 \n regression alpha {}'.format(
            alpha))
    m.fit(music_features, box_longitude_label)
    lon_r_squared = m.score(music_features, box_longitude_label)
    print('GLMNET L1 L2 alpha {} longitude r2 {}'.format(alpha, lon_r_squared))
    plot_predictions(
        inverse_box_cox(m.predict(music_features), lambda_lon, 180),
        longitude_label, 'l1_l2_longitude_residual_{}.png'.format(alpha),
        'residual vs fitted longitude for l1_l2 \n regression alpha {}'.format(
            alpha))
Example #35
0
 def test_lasso_with_weights(self):
     '''Test that a pure lasso (alpha=1) model gives expected results when
     sample weights are used.        
     '''
     Xdn = np.random.random(size=(25000,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     sw = np.random.uniform(size=(25000,))
     sw = sw / np.sum(sw)
     for w_mask in range(1, 10):
         for X in (Xdn, Xsp):
             w_masked = w.copy()
             w_masked[w_mask:] = 0
             y = np.dot(Xdn, w_masked)
             enet = ElasticNet(alpha=1)
             enet.fit(X, y, lambdas=[.01], weights=sw)
             test = (len(enet._coefficients.ravel() == w_mask))
             self.assertTrue(test)
Example #36
0
    def test_coef_interpolation(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=1729)
        m = m.fit(x, y)

        # predict for a value of lambda between two values on the computed path
        lamb_lo = m.lambda_path_[1]
        lamb_hi = m.lambda_path_[2]

        # a value not equal to one on the computed path
        lamb_mid = (lamb_lo + lamb_hi) / 2.0

        pred_lo = m.predict(x, lamb=lamb_lo)
        pred_hi = m.predict(x, lamb=lamb_hi)
        pred_mid = m.predict(x, lamb=lamb_mid)

        self.assertFalse(np.allclose(pred_lo, pred_mid))
        self.assertFalse(np.allclose(pred_hi, pred_mid))
Example #37
0
 def test_unregularized_with_weights(self):
     '''Test that fitting an unregularized model (lambda=0) gives expected
     results when sample weights are used.
     '''
     Xdn = np.random.random(size=(5000, 10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10, ))
     y = np.dot(Xdn, w)
     sw = np.random.uniform(size=(5000, ))
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp):
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, lambdas=[0], weights=sw)
             test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01)
             self.assertTrue(test_preds)
             test_coefs = np.allclose(enet._coefficients.ravel(),
                                      w,
                                      atol=.02)
             self.assertTrue(test_coefs)
Example #38
0
 def test_unregularized_models(self):
     '''Test that fitting an unregularized model (lambda=0) gives
     expected results for both dense and sparse model matricies.
     
       We test that an unregularized model captures a perfect linear
     relationship without error.  That is, the fit parameters equals the
     true coefficients.
     '''
     Xdn = np.random.random(size=(5000,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     y = np.dot(Xdn, w)
     for alpha in [0, .5, 1]:
         for X in (Xdn, Xsp): 
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, lambdas=[0])
             test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01)
             self.assertTrue(test_preds)
             test_coefs = np.allclose(enet._coefficients.ravel(), w, atol=.02)
             self.assertTrue(test_coefs)
Example #39
0
 def test_ridge_with_weights(self):
     '''Test that a pure ridge (alpha=0) model gives expected results
     for both dense and sparse matricies.
     '''
     Xdn = np.random.random(size=(50000,3))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(3,))
     sw = np.random.uniform(size=(50000,))
     sw = sw / np.sum(sw)
     for X in (Xdn, Xsp):
         for lam in np.linspace(0, 1, 10):
             y = np.dot(Xdn, w)
             enet = ElasticNet(alpha=0)
             enet.fit(X, y, lambdas=[lam], weights=sw)
             ratios = enet._coefficients.ravel() / w
             norm_ratios = ratios / np.max(ratios)
             test = np.allclose(
                 norm_ratios, 1, atol=.05
             )
             self.assertTrue(test)
Example #40
0
 def test_max_lambda_with_weights(self):
     '''Test that the calculations of max_lambda inside the fortran code and
     inside the python code give the same result on both dense and sparse
     matricies, even when sample weights come into play.  
     '''
     Xdn = np.random.random(size=(50,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     y = np.dot(Xdn, w)
     sw = np.random.uniform(size=(50,))
     for alpha in [.01, .5, 1]:
         for X in (Xdn, Xsp):
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y, weights=sw)
             ol = enet.out_lambdas
             max_lambda_from_fortran = ol[1] * (ol[1]/ol[2]) 
             max_lambda_from_python = enet._max_lambda(X, y, weights=sw)
             self.assertAlmostEqual(
                 max_lambda_from_fortran, max_lambda_from_python, 4
             )
Example #41
0
 def test_lasso_models(self):
     '''Test that a pure lasso (alpha=1) model gives expected results
     for both dense and sparse design matricies.        
     
       We test that the lasso model has the ability to pick out zero 
     parameters from a linear relationship.  To see this, we generate 
     linearly related data were some number of the coefficients are
     exactly zero, and make sure the lasso model can pick these out.
     '''
     Xdn = np.random.random(size=(25000,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     for w_mask in range(1, 10):
         for X in (Xdn, Xsp):
             w_masked = w.copy()
             w_masked[w_mask:] = 0
             y = np.dot(Xdn, w_masked)
             enet = ElasticNet(alpha=1)
             enet.fit(X, y, lambdas=[.01])
             test = (len(enet._coefficients.ravel() == w_mask))
             self.assertTrue(test)
Example #42
0
 def test_max_lambda(self):
     '''Test that the calculations of max_lambda inside the fortran code and
     inside the python code give the same result on both dense and sparse
     matricies.  
     
         Note that the implementation of max_lambda for alpha=0 in
     the fortran code is unknown, so we currently do not test against it.
     '''
     Xdn = np.random.random(size=(50,10))
     Xsp = csc_matrix(Xdn)
     w = np.random.random(size=(10,))
     y = np.dot(Xdn, w)
     for alpha in [.01, .5, 1]:
         for X in (Xdn, Xsp):
             enet = ElasticNet(alpha=alpha)
             enet.fit(X, y)
             ol = enet.out_lambdas
             max_lambda_from_fortran = ol[1] * (ol[1]/ol[2]) 
             max_lambda_from_python = enet._max_lambda(X, y)
             self.assertAlmostEqual(
                 max_lambda_from_fortran, max_lambda_from_python, 4
             )
    def test_lambda_clip_warning(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=1729)
        m = m.fit(x, y)

        # we should get a warning when we ask for predictions at values of
        # lambda outside the range of lambda_path_
        with self.assertWarns(RuntimeWarning):
            # note, lambda_path_ is in decreasing order
            m.predict(x, lamb=m.lambda_path_[0] + 1)

        with self.assertWarns(RuntimeWarning):
            m.predict(x, lamb=m.lambda_path_[-1] - 1)
Example #44
0
 def test_validate_inputs(self):
     X = np.random.random(size=(50,10))
     w = np.random.random(size=(10,))
     enet = ElasticNet(alpha=.5)
     # Invalid Use:
     #    Passing in a y that is too short.
     with self.assertRaises(ValueError):
         yprime = np.random.random(size=(49,))
         enet._validate_inputs(X, yprime)
     # Invalid use:
     #    Passing in a y that matches the wrong dimenstion of X.
     with self.assertRaises(ValueError):
         yprime = np.random.random(size=(10,))
         enet._validate_inputs(X, yprime)
     # Valid Use:
     #    Passing in a y of the correct dimension.
     yprime = np.random.random(size=(50,))
     enet._validate_inputs(X, yprime)
Example #45
0
 def test_validate_excl_preds(self):
     X = np.random.random(size=(50,10))
     w = np.random.random(size=(10,))
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.5)
     # Invalid use
     #    Passing in a excl_preds array that is to long.
     with self.assertRaises(ValueError):
         excl_preds = np.ones(shape=(12,))
         enet._validate_excl_preds(X, y, excl_preds=excl_preds)
     # Invalid use
     #    Alltempt to exclude a predictor out of range, i.e. that does
     #    not exist.
     with self.assertRaises(ValueError):
         excl_preds = np.ones(shape=(11,))
         excl_preds[0] = 1
         excl_preds[5] = 10
         enet._validate_excl_preds(X, y, excl_preds=excl_preds)
     # Valid use 
     #    Exclude some in range predictors.
     excl_preds = np.array([1, 2, 4, 6, 8])
     enet._validate_excl_preds(X, y, excl_preds=excl_preds)
    def test_relative_penalties(self):
        m1 = ElasticNet(random_state=4328)
        m2 = ElasticNet(random_state=4328)
        for x, y in self.inputs:
            p = x.shape[1]

            # m1 no relative penalties applied
            m1.fit(x, y)

            # find the nonzero indices from LASSO
            nonzero = np.nonzero(m1.coef_)

            # unpenalize those nonzero coefs
            penalty = np.repeat(1, p)
            penalty[nonzero] = 0

            # refit the model with the unpenalized coefs
            m2.fit(x, y, relative_penalties=penalty)

            # verify that the unpenalized coef ests exceed the penalized ones
            # in absolute value
            assert(np.all(np.abs(m1.coef_) <= np.abs(m2.coef_)))
Example #47
0
from sklearn.datasets import make_regression

display_bar = '-'*70

X, y = make_regression(
    n_samples = 5000,
    n_features = 100,
    n_informative = 30,
    effective_rank = 40,
    noise = .1,
)

print display_bar
print "Fit an elastic net on some fake data"
print display_bar

enet = ElasticNet(alpha=.025)
enet.fit(X, y)

print enet

print display_bar
print "Predictions vs. actuals for the last elastic net model:"
print display_bar

preds = enet.predict(X)
print y[:10]
print preds[:10,np.shape(preds)[1]-1]

enet.plot_paths()
 def test_alphas(self):
     x, y = self.inputs[0]
     for alpha in self.alphas:
         m = ElasticNet(alpha=alpha, random_state=2465)
         m = m.fit(x, y)
         self.check_r2_score(y, m.predict(x), 0.90, alpha=alpha)
 def test_cv_scoring(self):
     x, y = self.inputs[0]
     for method in self.scoring:
         m = ElasticNet(scoring=method, random_state=1729)
         m = m.fit(x, y)
         self.check_r2_score(y, m.predict(x), 0.90, scoring=method)