Ejemplo n.º 1
0
def ode_integratedlasso_rank_vars(D,
                                  times,
                                  target,
                                  env=None,
                                  silent=True,
                                  interactions=True,
                                  rm_target=True):

    L = len(times)
    d = D.shape[1] // L
    n = D.shape[0]

    Xint = np.zeros([(L - 1) * n, d])
    deltaY = np.zeros([(L - 1) * n])
    for i in range(n):
        deltaY[(i) * (L - 1):(i + 1) * (L - 1)] = np.diff(
            D[i, target * L:(target + 1) * L])
    for j in range(d):
        for i in range(n):
            tmp = D[i, j * L:(j + 1) * L]
            Xint[i * (L - 1):(i + 1) * (L - 1),
                 j] = (tmp[:(L - 1)] + tmp[1:]) / 2 * np.diff(times)

    # remove NAs
    na_ind = np.logical_or(np.isnan(deltaY), (np.isnan(Xint) > 0).sum(axis=1))
    deltaY = deltaY[~na_ind]
    Xint = Xint[~na_ind, ]

    # Perform lasso
    if interactions:
        dC2 = d * (d - 1) // 2  # combination
        var_names = np.zeros([d + dC2 + d], dtype=np.object)
        var_names[:d] = np.array([[]] + [[i] for i in range(d)],
                                 dtype=np.object)[1:]
        var_names[d:] = np.array([[]] + sum(
            ([[i, j] for j in range(i + 1)] for i in range(d)), []),
                                 dtype=np.object)[1:]
        Xint_interactions = np.zeros([n * (L - 1), len(var_names)])
        Xint_interactions[:, :d] = Xint
        for i in range(d, len(var_names)):
            Xint_interactions[:, i] = Xint[:, var_names[i]
                                           [0]] * Xint[:, var_names[i][1]]
        fit = ElasticNet().fit(Xint_interactions, deltaY)
        sel_matrix = (np.abs(fit.coef_path_) > 1e-7)
        first_entrance = sel_matrix.max(axis=1)
        # find all rows without ones and set first entrance to Inf
        first_entrance[sel_matrix.sum(axis=1) == 0] = np.infty
        ranking = first_entrance.argsort()
        ranking = var_names[ranking]
    else:
        fit = ElasticNet().fit(Xint, deltaY)
        sel_matrix = fit.coef_path_ != 0
        first_entrance = sel_matrix.max(axis=1)
        # find all rows without ones and set first entrance to Inf
        first_entrance[sel_matrix.sum(axis=1) == 0] = np.infty
        ranking = first_entrance.argsort()
    if rm_target:
        ranking = ranking[ranking != target]

    return ({'ranking': ranking, 'coef': fit.coef_})
Ejemplo n.º 2
0
 def test_one_row_predict(self):
     # Verify that predicting on one row gives only one row of output
     m = ElasticNet(random_state=42)
     for X, y in self.inputs:
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)))
         assert p.shape == (1,)
    def _notify(self, trainer):
        if not self.computed:
            X = trainer.X.cpu().detach().numpy()
            y = trainer.y.cpu().detach().numpy()

            glmnet = ElasticNet(n_splits=0,
                                fit_intercept=False,
                                lambda_path=np.exp(
                                    np.linspace(start=np.log(10**(-9)),
                                                stop=np.log(10),
                                                num=200)))
            glmnet.fit(X, y.squeeze())
            self.lambdas, self.coefs = glmnet.lambda_path_, glmnet.coef_path_
            # Swap axes for parameters learned by lasso path, so that lambda
            # corresponds to the same axis as time for gradient descent iterates.
            self.coefs = np.swapaxes(self.coefs, 0, 1)
            # So now self.coefs is of shape (n_lambda, n_params).
            for lambda_id in range(self.coefs.shape[0]):
                w_lambda = self.coefs[lambda_id, :]
                self.append_performance_metrics(w_lambda, trainer)

            if self.store_path is False:
                # Dummy array so that _aggregate_numeric_results works.
                self.coefs = [np.array([[-1], [-1]])]

            self.computed = True
Ejemplo n.º 4
0
    def TL(self, x, y, pen_bic, pen_gic):
        m = ElasticNet()
        if len(x.shape) < 2:
            x.reshape((x.shape[0], 1))

        m = m.fit(x, y)
        betas = m.coef_path_
        intercepts = m.intercept_path_
        BIC = np.inf
        for i in range(betas.shape[1]):

            RSS = np.sum((y - np.matmul(x, betas[:, i]) - intercepts[i])**2)
            k = np.sum(betas[:, i] != 0)
            BIC_new = RSS + pen_bic * k
            if BIC_new < BIC:
                BIC = BIC_new
                beta_bic = betas[:, i]
                intercept = intercepts[i]
        thresholds = beta_bic[beta_bic > 0]
        thresholds.sort()
        beta_gic = np.zeros_like(beta_bic)
        GIC = np.inf
        for delta in thresholds:
            beta_thres = deepcopy(beta_bic)
            beta_thres[beta_thres < delta] = 0
            RSS = np.sum((y - np.matmul(x, beta_thres) - intercept)**2)
            k = np.sum(beta_thres != 0)
            GIC_new = RSS + pen_gic * k
            if GIC_new < GIC:
                GIC = GIC_new
                beta_gic = beta_thres

        return beta_gic, intercept
Ejemplo n.º 5
0
 def test_one_row_predict_with_lambda(self):
     # One row to predict along with lambdas should give 2D output
     m = ElasticNet(random_state=42)
     for X, y in self.inputs:
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)), lamb=[20, 10])
         assert p.shape == (1, 2)
Ejemplo n.º 6
0
 def test_validate_rel_penalties(self):
     X = np.random.random(size=(50, 10))
     w = np.random.random(size=(10, ))
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.5)
     # Invalid use:
     #    Passing in a rel_penalties vector that is too short.
     with self.assertRaises(ValueError):
         rel_pens = np.ones(shape=(9, ))
         enet._validate_rel_penalties(X, y, rel_penalties=rel_pens)
     # Invalid use:
     #    Passing in a rel_penalties that matches the wrong dimenstion of X.
     with self.assertRaises(ValueError):
         rel_pens = np.ones(shape=(50, ))
         enet._validate_rel_penalties(X, y, rel_penalties=rel_pens)
     # Invalid use:
     #    Passing in a rel_penalties containing a negative entry.
     with self.assertRaises(ValueError):
         rel_pens = np.ones(shape=(10, ))
         rel_pens[5] = -1
         enet._validate_rel_penalties(X, y, rel_penalties=rel_pens)
     # Invalid use:
     #    Passing in a rel_panalties with no positive entries.
     with self.assertRaises(ValueError):
         rel_pens = np.zeros(shape=(10, ))
         enet._validate_rel_penalties(X, y, rel_penalties=rel_pens)
     # Valid use:
     #    Rel_penalties has the correct dimenstion with all non-negative
     #    entries.
     rel_pens = np.ones(shape=(10, ))
     rel_pens[5] = 0
     enet._validate_rel_penalties(X, y, rel_penalties=rel_pens)
Ejemplo n.º 7
0
def train_and_test(basename):
    d = np.load("train_test_" + basename + ".npz")
    train_x = d["train_x"]
    train_y = d["train_y"]
    test_x = d["test_x"]
    test_y = d["test_y"]
    countrylist = d["countrylist"]
    numcountries = train_x.shape[1]
    weights = np.zeros((numcountries,numcountries))
    enets = [None]*numcountries
    enet_cvs = [None]*numcountries
    preds = np.zeros(test_y.shape)
    errors = np.zeros(test_y.shape)
    var_ratio = np.zeros((numcountries))
    for i in range(numcountries):
	enets[i] = ElasticNet(alpha=.1)
	enet_cvs[i] = CVGlmNet(enets[i], n_folds=10, n_jobs=10)
	enet_cvs[i].fit(train_x, train_y[:,i])
	bli = enet_cvs[i].best_lambda_idx
	weights[i,:] = enet_cvs[i].base_estimator.get_coefficients_from_lambda_idx(bli)
	preds[:,i] = enet_cvs[i].predict(test_x)
	errors[:,i] = test_y[:,i] - preds[:,i]
	var_truth = np.var(test_y[:,i])
	var_err = np.var(errors[:,i])
	var_ratio[i] = 1 - var_err/var_truth
	print("finished predicting country number %d" % i)
    np.savez("results_" + basename + ".npz", preds = preds, truth=test_y, errors = errors, var_ratio = var_ratio, countrylist = countrylist, weights=weights)
Ejemplo n.º 8
0
    def test_ridge_models(self):
        '''Test that a pure ridge (alpha=0) model gives expected results
        for both dense and sparse matricies.

          We test that the ridge model, when fit on uncorrelated predictors,
        shrinks the parameter estiamtes uniformly.  To see this, we generate
        linearly related data with a correlation free model matrix, then test
        that the array of ratios of fit parameters to true coefficients is 
        a constant array.
        
        This test generates more samples than the others to guarentee that the
        data is sufficiently correlation free, otherwise the effect to be 
        measured does not occur.
        '''
        Xdn = np.random.random(size=(50000, 3))
        Xsp = csc_matrix(Xdn)
        w = np.random.random(size=(3, ))
        for X in (Xdn, Xsp):
            for lam in np.linspace(0, 1, 10):
                y = np.dot(Xdn, w)
                enet = ElasticNet(alpha=0)
                enet.fit(X, y, lambdas=[lam])
                ratios = enet._coefficients.ravel() / w
                norm_ratios = ratios / np.max(ratios)
                test = np.allclose(norm_ratios, 1, atol=.05)
                self.assertTrue(test)
Ejemplo n.º 9
0
 def test_validate_weights(self):
     X = np.random.random(size=(50, 10))
     w = np.random.random(size=(10, ))
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.5)
     # Invalid use
     #    Passing in a sample weight vector that is too short.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(49, ))
         enet._validate_weights(X, y, weights=sw)
     # Invalid use:
     #    Passing in a weight vector that matches the wrong dimenstion of X.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(10, ))
         enet._validate_weights(X, y, weights=sw)
     # Invalid use:
     #    Passing in a weight vector containing a negative entry.
     with self.assertRaises(ValueError):
         sw = np.ones(shape=(50, ))
         sw[25] = -1
         enet._validate_weights(X, y, weights=sw)
     # Valid Use:
     #    Weight vector of the correct dimension with all non-negative
     #    entries.
     sw = np.ones(shape=(50, ))
     enet._validate_weights(X, y, weights=sw)
Ejemplo n.º 10
0
 def test_max_features(self):
     x, y = self.inputs[3]
     max_features = 5
     m = ElasticNet(n_splits=3, random_state=42, max_features=max_features)
     m = m.fit(x, y)
     num_features = np.count_nonzero(m.coef_)
     self.assertTrue(num_features <= max_features)
Ejemplo n.º 11
0
 def test_random_state_cv(self):
     random_state = 133
     m = ElasticNet(random_state=random_state)
     x, y = self.inputs[0]
     m.fit(x, y)
     print(dir(m._cv))
     assert m._cv.random_state == random_state
Ejemplo n.º 12
0
    def test_with_single_var(self):
        x = np.random.rand(500,1)
        y = (1.3 * x).ravel()

        m = ElasticNet(random_state=449065)
        m = m.fit(x, y)
        self.check_r2_score(y, m.predict(x), 0.90)
Ejemplo n.º 13
0
    def test_predict_without_cv(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=340561)
        m = m.fit(x, y)

        # should not make prediction unless value is passed for lambda
        with self.assertRaises(ValueError):
            m.predict(x)
Ejemplo n.º 14
0
 def test_coef_limits(self):
         x, y = self.inputs[0]
         lower_limits = np.repeat(-1, x.shape[1])
         upper_limits = 0
         m = ElasticNet(lower_limits=lower_limits, upper_limits=upper_limits, random_state=5934, alpha=0)
         m = m.fit(x, y)
         assert(np.all(m.coef_ >= -1))
         assert(np.all(m.coef_ <= 0))
Ejemplo n.º 15
0
def test_linear_glmnet(benchmark, alpha, n_obs):
    rng = np.random.default_rng(SEED)
    error = rng.normal(loc=0, scale=1, size=n_obs)
    X = rng.normal(loc=5, scale=2, size=(n_obs, 4))
    true_betas = np.array([1, -2, 0.5, 1])
    y = X.dot(true_betas) + error
    m = ElasticNet(alpha=alpha)
    benchmark(m.fit, X, y)
Ejemplo n.º 16
0
    def test_with_pandas_df(self):
        x, y = make_regression(random_state=561)
        df = pd.DataFrame(x)
        df['y'] = y

        m = ElasticNet(n_folds=3, random_state=123)
        m = m.fit(df.drop(['y'], axis=1), df.y)
        sanity_check_regression(m, x)
Ejemplo n.º 17
0
    def test_with_no_predictor_variance(self):
        x = np.ones((500, 1))
        y = np.random.rand(500)

        m = ElasticNet(random_state=561)
        msg = "All predictors have zero variance (glmnet error no. 7777)."
        with self.assertRaises(ValueError, msg=msg):
            m.fit(x, y)
Ejemplo n.º 18
0
 def test_coef_limits(self):
     x, y = self.inputs[0]
     lower_limits = 0
     upper_limits = np.repeat(1, x.shape[1])
     m = ElasticNet(lower_limits=lower_limits,
                    upper_limits=upper_limits,
                    random_state=5934)
     m = m.fit(x, y)
     assert (np.all(m.coef_) >= 0)
     assert (np.all(m.coef_) <= 1)
Ejemplo n.º 19
0
 def test_n_splits(self):
     x, y = self.inputs[0]
     for n in self.n_splits:
         m = ElasticNet(n_splits=n, random_state=6601)
         if n > 0 and n < 3:
             with self.assertRaisesRegexp(ValueError,
                                          "n_splits must be at least 3"):
                 m = m.fit(x, y)
         else:
             m = m.fit(x, y)
             sanity_check_regression(m, x)
Ejemplo n.º 20
0
def test_fit_cv_glmnet_comparison():
    rng = np.random.default_rng(SEED)
    error = rng.normal(loc=0, scale=1, size=100)
    X = rng.normal(loc=5, scale=2, size=(100, 4))
    true_betas = np.array([1, -2, 0.5, 1])
    y = X.dot(true_betas) + error
    m = Elnet(n_splits=3, random_state=182, scoring="r2")
    m.fit(X, y)
    m2 = ElasticNet(n_splits=3, random_state=182)
    m2.fit(X, y)
    np.testing.assert_almost_equal(m.lambda_max_, m2.lambda_max_)
    np.testing.assert_almost_equal(m.lambda_1se_, m2.lambda_best_[0])
Ejemplo n.º 21
0
    def test_with_defaults(self):
        m = ElasticNet(random_state=2821)
        for x, y in self.inputs:
            m = m.fit(x, y)
            sanity_check_regression(m, x)

            # check selection of lambda_best
            self.assertTrue(m.lambda_best_inx_ <= m.lambda_max_inx_)

            # check full path predict
            p = m.predict(x, lamb=m.lambda_path_)
            self.assertEqual(p.shape[-1], m.lambda_path_.size)
Ejemplo n.º 22
0
    def create_estimator(self):
        """
        Create an estimator.

        Creates an estimator depending on the family of regression.

        :return: A scikit-learn estimator.
        """
        if self.family == 'gaussian':
            estimator = ElasticNet(standardize=False, cut_point=0)
        elif self.family == 'binomial':
            estimator = LogitNet(standardize=False, cut_point=0)
        return estimator
Ejemplo n.º 23
0
 def test_validate_matrix(self):
     '''Test the _validate_matrix method.'''
     Xdn = np.random.random(size=(50, 10))
     enet = ElasticNet(alpha=.5)
     # Invalid use:
     #   Passing in a sparse matrix in the incorrect format.
     with self.assertRaises(ValueError):
         Xsp = csr_matrix(Xdn)
         enet._validate_matrix(Xsp)
     # Valid use:
     #   Passing in a matrix in compressed sparse column format.
     Xsp = csc_matrix(Xdn)
     enet._validate_matrix(Xsp)
Ejemplo n.º 24
0
    def test_lambda_clip_warning(self):
        x, y = self.inputs[0]
        m = ElasticNet(n_splits=0, random_state=1729)
        m = m.fit(x, y)

        # we should get a warning when we ask for predictions at values of
        # lambda outside the range of lambda_path_
        with self.assertWarns(RuntimeWarning):
            # note, lambda_path_ is in decreasing order
            m.predict(x, lamb=m.lambda_path_[0] + 1)

        with self.assertWarns(RuntimeWarning):
            m.predict(x, lamb=m.lambda_path_[-1] - 1)
Ejemplo n.º 25
0
def _parallel_permute_count_nonzero_penalised_coefs(xp, yp, lam_path,
                                                    penalties, norm_num,
                                                    is_regression):
    from glmnet import ElasticNet, LogitNet
    np.random.shuffle(yp)

    params = dict(alpha=norm_num, lambda_path=lam_path)
    pm = ElasticNet(**params) if is_regression else LogitNet(**params)
    pm.fit(xp, yp, relative_penalties=penalties)

    return np.sign(
        np.abs(np.squeeze(pm.coef_path_)) *
        vec_to_array(penalties)).sum(axis=0)
Ejemplo n.º 26
0
    def test_relative_penalties(self):
        m1 = ElasticNet(random_state=4328)
        m2 = ElasticNet(random_state=4328)
        for x, y in self.inputs:
            p = x.shape[1]

            # m1 no relative penalties applied
            m1.fit(x, y)

            # find the nonzero indices from LASSO
            nonzero = np.nonzero(m1.coef_)

            # unpenalize those nonzero coefs
            penalty = np.repeat(1, p)
            penalty[nonzero] = 0

            # refit the model with the unpenalized coefs
            m2.fit(x, y, relative_penalties=penalty)

            # verify that the unpenalized coef ests exceed the penalized ones
            # in absolute value
            assert(np.all(np.abs(m1.coef_) <= np.abs(m2.coef_)))
Ejemplo n.º 27
0
 def test_edge_cases(self):
     '''Edge cases in model specification.'''
     X = np.random.random(size=(50, 10))
     w = np.random.random(size=(10, ))
     y = np.dot(X, w)
     # Edge case
     #    A single lambda is so big that it sets all estimated coefficients
     #    to zero.  This used to break the predict method.
     enet = ElasticNet(alpha=1)
     enet.fit(X, y, lambdas=[10**5])
     _ = enet.predict(X)
     # Edge case
     #    Multiple lambdas are so big as to set all estiamted coefficients
     #    to zero.  This used to break the predict method.
     enet = ElasticNet(alpha=1)
     enet.fit(X, y, lambdas=[10**5, 2 * 10**5])
     _ = enet.predict(X)
     # Edge case:
     #    Some predictors have zero varaince.  This used to break lambda
     #    max.
     X = np.random.random(size=(50, 10))
     X[:, 2] = 0
     X[:, 8] = 1
     y = np.dot(X, w)
     enet = ElasticNet(alpha=.1)
     enet.fit(X, y)
     ol = enet.out_lambdas
     max_lambda_from_fortran = ol[1] * (ol[1] / ol[2])
     max_lambda_from_python = enet._max_lambda(X, y)
     self.assertAlmostEqual(max_lambda_from_fortran, max_lambda_from_python,
                            4)
     # Edge case.
     #     All predictors have zero variance.  This is an error in
     #     sepcification.
     with self.assertRaises(ValueError):
         X = np.ones(shape=(50, 10))
         enet = ElasticNet(alpha=.1)
         enet.fit(X, y)
Ejemplo n.º 28
0
    def run(self, epochs: int):
        print("Starting ElasticNet simulation.")

        # First, set up the data loaders, that will be used for computing
        # metrics via the self.executor object.
        self._reset_data_loaders()

        # The epochs parameter will be ignored but it is needed for
        # compatibility of the rprml.core.Executor class.
        X = self.train_dataset.X.cpu().detach().numpy()
        y = self.train_dataset.y.cpu().detach().numpy()

        # Lambda path has to be supplied in decreasing order.
        lambda_path = np.array(self.lambdas)
        lambda_path = -np.sort(-lambda_path)
        # Append infinity to the front of lambda_path.
        # We do this because the glmnet package modifies the first lambda.
        lambda_path = np.insert(lambda_path, 0, 1e10000, axis=0)

        glmnet = ElasticNet(alpha=self.alpha,
                            n_splits=0,
                            fit_intercept=False,
                            standardize=False,
                            lambda_path=lambda_path)
        glmnet.fit(X, y.squeeze())

        lambdas, coefs = glmnet.lambda_path_, glmnet.coef_path_
        # Swap axes for parameters learned by lasso path, so that lambda
        # corresponds to the first axis.
        coefs = np.swapaxes(coefs, 0, 1)
        # Remove the first value of lambda (because glmnet modifies its value)
        # and remove the associated fitted vector.
        lambdas = np.array(lambdas).flatten()[1:]
        coefs = np.array(coefs)[1:, :]

        # Save lambdas and alpha to the executor's history.
        self.executor.history['lambdas'] = lambdas
        self.executor.history['alpha'] = self.alpha

        # For each fitted model, compute the metrics registered to
        # self.executor.
        for lambda_id in range(coefs.shape[0]):
            w_lambda = coefs[lambda_id, :]
            w_lambda = torch.tensor(w_lambda,
                                    dtype=torch.float32,
                                    device=self.device)
            self.model.set_w(w_lambda)
            # Compute the metrics associated to w_lambda.
            self.trainer.fire_event(_iteration_level_event)
Ejemplo n.º 29
0
def FeatureSelection(df_x, xtrain, ytrain, exclude_cols=[]):

    # #### Gam
    # gam = LinearGAM(n_splines=4).gridsearch(xtrain, ytrain)
    # pvalues = np.array(gam.statistics_['p_values'])
    # important_x_idx_gam = [idx-1 for idx in np.where(pvalues < 0.1)[0]]
    # important_x_gam = df_x.iloc[:, important_x_idx_gam]

    #### Lasso
    lasso = ElasticNet(alpha=1, n_splits=10, random_state=123, n_jobs=4)
    lasso.fit(xtrain, ytrain)
    coeffs = lasso.coef_
    important_x_dx_lasso = np.where(coeffs != 0.)[0]
    important_x_lasso = [col for col in df_x.columns[important_x_dx_lasso]]
    return (important_x_lasso, important_x_dx_lasso)
Ejemplo n.º 30
0
def glmnet_box():
    m1 = ElasticNet(n_splits=20, scoring='r2', alpha=0)
    m1.fit(music_features, box_latitude_label)
    lat_r_squared = m1.score(music_features, box_latitude_label)
    print('GLMNET ridge lattitude r2 {}'.format(lat_r_squared))
    plot_predictions(
        inverse_box_cox(m1.predict(music_features), lambda_lat,
                        90), latitude_label, 'ridge_latitude_residual.png',
        'residual vs fitted latitude for Ridge')
    m1.fit(music_features, box_longitude_label)
    lon_r_squared = m1.score(music_features, box_longitude_label)
    print('GLMNET ridge longitude r2 {}'.format(lon_r_squared))
    plot_predictions(
        inverse_box_cox(m1.predict(music_features), lambda_lon,
                        180), longitude_label, 'ridge_longitude_residual.png',
        'residual vs fitted longitude for Ridge regression')