Python _alpha_grid Examples, sklearn.linear_model.coordinate_descent._alpha_grid Python Examples

Example #1

0

Show file

    def init_reg_params(self, X, y):

        # For posterity: We select lambda2 by appling the LOCCV formula and explitily 
        # optimizing this expression rather than using Ridge's CV because of memory 
        # considerations for large problems (Ridge CV tries to create a large array of 
        # all samples and all choices of regularization parameters)

        # However, before applying to Elastic Net, it is important that we divide this
        # estimate by 1/(2 * n_samples) to appropriately re-balance the terms 

        if self.lambda2 is None:
            l2 = minimize_scalar(lambda l2 : LOOCV(X, y, l2)).x
            self.lambda2 = l2/(2 * y.size)

        self.dummy_path = False

        if self.lambda1 is None:
            self.lambda1 = _alpha_grid(X, y, n_alphas = 100)
        else:
            if np.isscalar(self.lambda1):
                self.lambda1 = np.array([self.lambda1])
            lambda1 = np.flipud(np.sort(self.lambda1))
            if lambda1.size < 3:
                lambda1 = np.sort(lambda1)
                # Create a dummy path for the path solver
                self.dummy_path = True
                self.pathlength = lambda1.size
                while lambda1.size < 3:
                    lambda1 = np.append(lambda1, lambda1[-1]/2)
            self.lambda1 = lambda1

Example #2

0

Show file

File: test_uoi_lasso.py Project: pssachdeva/pyuoi

def test_pyclasso():
    """Tests whether the PycLasso class is working"""

    pyclasso = PycLasso(fit_intercept=False, max_iter=1000)

    # Test that we can set params correctly
    pyclasso.set_params(fit_intercept=True)
    assert (pyclasso.fit_intercept)
    pyclasso.set_params(max_iter=500)
    assert (pyclasso.max_iter == 500)
    pyclasso.set_params(alphas=np.arange(100))
    assert (np.array_equal(pyclasso.alphas, np.arange(100)))

    # Test that spurious parameters are rejected
    try:
        pyclasso.set_params(blah=5)
        Exception('No exception thrown!')
    except ValueError:
        pass
    finally:
        Exception('Unexpected Exception raised')

    # Tests against a toy problem
    X = np.array([[-1, 2, 3], [4, 1, -7], [1, 3, 1], [4, 3, 12], [8, 11, 2]],
                 dtype=float)
    beta = np.array([1, 4, 2], dtype=float)
    y = np.dot(X, beta)

    alphas = _alpha_grid(X, y)
    pyclasso.set_params(alphas=alphas, fit_intercept=False)
    pyclasso.fit(X, y)
    assert (np.array_equal(pyclasso.coef_.shape, (100, 3)))
    y_pred = pyclasso.predict(X)
    scores = np.array([r2_score(y, y_pred[:, j]) for j in range(100)])
    assert (np.allclose(1, max(scores)))

Example #3

0

Show file

 def get_reg_params(self, X, y):
     alphas = _alpha_grid(X=X,
                          y=y,
                          l1_ratio=1.0,
                          fit_intercept=self.fit_intercept,
                          eps=self.eps,
                          n_alphas=self.n_lambdas)
     return [{'alpha': a} for a in alphas]

Example #4

0

Show file

File: test_space_net.py Project: spanlab/spantoolbox

def test_space_net_alpha_grid_same_as_sk():
    try:
        from sklearn.linear_model.coordinate_descent import _alpha_grid
        iris = load_iris()
        X = iris.data
        y = iris.target
        np.testing.assert_almost_equal(
            _space_net_alpha_grid(X, y, n_alphas=5),
            X.shape[0] * _alpha_grid(X, y, n_alphas=5, fit_intercept=False))
    except ImportError:
        raise SkipTest

Example #5

0

Show file

File: test_space_net.py Project: CandyPythonFlow/nilearn

def test_space_net_alpha_grid_same_as_sk():
    try:
        from sklearn.linear_model.coordinate_descent import _alpha_grid
        iris = load_iris()
        X = iris.data
        y = iris.target
        np.testing.assert_almost_equal(_space_net_alpha_grid(
            X, y, n_alphas=5), X.shape[0] * _alpha_grid(X, y, n_alphas=5,
                                                        fit_intercept=False))
    except ImportError:
        raise SkipTest

Example #6

0

Show file

    def get_reg_params(self, X, y):
        """Calculates the regularization parameters (alpha and lambda) to be
        used for the provided data.

        Note that the Elastic Net penalty is given by

                1 / (2 * n_samples) * ||y - Xb||^2_2
            + lambda * (alpha * |b|_1 + 0.5 * (1 - alpha) * |b|^2_2)

        where lambda and alpha are regularization parameters.

        Scikit-learn does not use these names. Instead, scitkit-learn
        denotes alpha by 'l1_ratio' and lambda by 'alpha'.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The design matrix.

        y : array-like, shape (n_samples)
            The response vector.

        Returns
        -------
        reg_params : a list of dictionaries
            A list containing dictionaries with the value of each
            (lambda, alpha) describing the type of regularization to impose.
            The keys adhere to scikit-learn's terminology (lambda->alpha,
            alpha->l1_ratio). This allows easy passing into the ElasticNet
            object.
        """
        if self.lambdas is None:
            self.lambdas = np.zeros((self.n_alphas, self.n_lambdas))
            # a set of lambdas are generated for each alpha value (l1_ratio in
            # sci-kit learn parlance)
            for alpha_idx, alpha in enumerate(self.alphas):
                self.lambdas[alpha_idx, :] = _alpha_grid(
                    X=X,
                    y=y,
                    l1_ratio=alpha,
                    fit_intercept=self.fit_intercept,
                    eps=self.eps,
                    n_alphas=self.n_lambdas,
                    normalize=self.normalize)

        # place the regularization parameters into a list of dictionaries
        reg_params = list()
        for alpha_idx, alpha in enumerate(self.alphas):
            for lamb_idx, lamb in enumerate(self.lambdas[alpha_idx]):
                # reset the regularization parameter
                reg_params.append(dict(alpha=lamb, l1_ratio=alpha))

        return reg_params

Example #7

0

Show file

File: Lasso_utils.py Project: marty10/LASSO

def compute_weightedLASSO(lasso, XTrain_current, YTrain, XTest_current, YTest, scoring, score_f, verbose, values_TM):
    # values_TM è una matrice contenente i valori di t e m per il train e il test
    alphas = _alpha_grid(XTrain_current, YTrain, fit_intercept=False)
    parameters = {"alpha": alphas}

    clf = GridSearchCV(lasso, parameters, fit_params = {"verbose" : False}, cv=3, scoring=scoring)
    clf.fit(XTrain_current, YTrain)
    lambda_opt = clf.best_params_

    print("best lambda", lambda_opt)

    lasso.set_params(**lambda_opt)
    lasso.fit(XTrain_current,YTrain)

    y_pred_train = lasso.predict(XTrain_current)
    mse_train = score_f(YTrain, y_pred_train)

    y_pred_test = lasso.predict(XTest_current)
    mse_test = score_f(YTest, y_pred_test)

    if verbose:
        print("mse_train "+lasso.__class__.__name__,mse_train)
        print ("mse_test weights "+lasso.__class__.__name__,mse_test)
        print("mae train",100*mean_absolute_error(YTrain,y_pred_train)/89.7)
        print("mae test",100*mean_absolute_error(YTest,y_pred_test)/89.7)

        print("mse train",100*np.sqrt(mean_squared_error(YTrain,y_pred_train))/89.7)
        print("mse test",100*np.sqrt(mean_squared_error(YTest,y_pred_test))/89.7)

    ##values[0] = 24, values[1] = 281, values[2] = 214
    if len(values_TM)!=0:
        abs_error_train = 100*mean_absolute_error(YTrain,y_pred_train)*len(YTrain)/(89.7 * values_TM[0, 0] * values_TM[0, 1])

        abs_error_test = 100*mean_absolute_error(YTest,y_pred_test)*len(YTest)/(89.7 * values_TM[1, 0] * values_TM[1,1])

        mse_error_train = 100.*np.sqrt(mean_squared_error(YTrain,y_pred_train)*len(YTrain)/(values_TM[0, 0] * values_TM[0, 1]))/(89.7)
        print("mean squared error train", mse_error_train )

        mse_error_test = 100.*np.sqrt(mean_squared_error(YTest,y_pred_test)*len(YTest)/(values_TM[1, 0] * values_TM[1, 1]))/(89.7)
        print("mean squared error test", mse_error_test )

        if verbose:
            print("abs test", abs_error_test)
            print("abs train", abs_error_train)

    return mse_test, lasso.beta

Example #8

0

Show file

File: elasticnet.py Project: davidclark1/PyUoI

 def get_reg_params(self, X, y):
     if self.lambdas is None:
         self.lambdas = np.zeros((self.n_alphas, self.n_lambdas))
         for alpha_idx, alpha in enumerate(self.alphas):
             self.lambdas[alpha_idx, :] = _alpha_grid(
                 X=X, y=y,
                 l1_ratio=alpha,
                 fit_intercept=self.fit_intercept,
                 eps=self.eps,
                 n_alphas=self.n_lambdas,
                 normalize=self.normalize
             )
     ret = list()
     for alpha_idx, alpha in enumerate(alphas):
         for lamb_idx, lamb in enumerate(self.lambdas[alpha_idx]):
             # reset the regularization parameter
             ret.append(dict(alpha=lamb, l1_ratio=alpha))
     return ret

Example #9

0

Show file

def admm_path(X,
              y,
              Xy=None,
              alphas=None,
              eps=1e-3,
              n_alphas=100,
              rho=1.0,
              max_iter=1000,
              tol=1e-04):
    _, n_features = X.shape
    multi_output = False
    n_iters = []

    if y.ndim != 1:
        multi_output = True
        _, n_outputs = y.shape

    if alphas is None:
        alphas = _alpha_grid(X,
                             y,
                             Xy=Xy,
                             l1_ratio=1.0,
                             eps=eps,
                             n_alphas=n_alphas)
    else:
        alphas = np.sort(alphas)[::-1]
        n_alphas = len(alphas)

    if not multi_output:
        coefs = np.zeros((n_features, n_alphas), dtype=X.dtype)
    else:
        coefs = np.zeros((n_features, n_outputs, n_alphas), dtype=X.dtype)

    for i, alpha in enumerate(alphas):
        clf = LassoADMM(alpha=alpha, rho=rho, max_iter=max_iter, tol=tol)
        clf.fit(X, y)
        coefs[..., i] = clf.coef_
        n_iters.append(clf.n_iter_)

    return alphas, coefs, n_iters

Example #10

0

Show file

    def run(self, X, y, args, selection_methods=['CV']):

        if hasattr(self, 'fitted_estimator'):
            del self.fitted_estimator

        self.n_alphas = args['n_alphas']
        self.cv_splits = 5

        # Draw alphas using the _alpha_grid method
        self.alphas = _alpha_grid(X, y.ravel(), n_alphas=self.n_alphas)

        # Container for results
        self.results = {
            selection_method: {}
            for selection_method in selection_methods
        }

        # For each selection method, obtain coefficients and selected
        # hyperparameter
        true_model = args['betas'].ravel()
        for selection_method in selection_methods:
            self.fit_and_select(X, y.ravel(), selection_method, true_model)

        return self.results

Example #11

0

Show file

File: regression.py Project: slipguru/l1l2py

def l1l2_regularization(
    X, y, max_iter=100000, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
    precompute='auto', Xy=None, copy_X=True, coef_init=None,
    verbose=False, return_n_iter=False, positive=False,
        tol=1e-5, check_input=True, **params):
    if check_input:
        X = check_array(X, 'csc', dtype=[np.float64, np.float32],
                        order='F', copy=copy_X)
        y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False,
                        ensure_2d=False)
        if Xy is not None:
            # Xy should be a 1d contiguous array or a 2D C ordered array
            Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False,
                             ensure_2d=False)

    _, n_features = X.shape

    multi_output = False
    if y.ndim != 1:
        multi_output = True
        _, n_outputs = y.shape

    # MultiTaskElasticNet does not support sparse matrices
    from scipy import sparse
    if not multi_output and sparse.isspmatrix(X):
        if 'X_offset' in params:
            # As sparse matrices are not actually centered we need this
            # to be passed to the CD solver.
            X_sparse_scaling = params['X_offset'] / params['X_scale']
            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)
        else:
            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)

    # X should be normalized and fit already if function is called
    # from ElasticNet.fit
    if check_input:
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, Xy, precompute, normalize=False,
                     fit_intercept=False, copy=False)
    if alphas is None:
        # No need to normalize of fit_intercept: it has been done above
        alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio,
                             fit_intercept=False, eps=eps, n_alphas=n_alphas,
                             normalize=False, copy_X=False)
    else:
        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered

    n_alphas = len(alphas)
    tol = params.get('tol', 1e-4)
    max_iter = params.get('max_iter', 1000)
    dual_gaps = np.empty(n_alphas)
    n_iters = []

    rng = check_random_state(params.get('random_state', None))
    selection = params.get('selection', 'cyclic')
    if selection not in ['random', 'cyclic']:
        raise ValueError("selection should be either random or cyclic.")
    random = (selection == 'random')

    if not multi_output:
        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)
    else:
        coefs = np.empty((n_outputs, n_features, n_alphas),
                         dtype=X.dtype)

    if coef_init is None:
        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype))
    else:
        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)

    for i, alpha in enumerate(alphas):
        l1_reg = alpha * l1_ratio * 2  # * n_samples
        l2_reg = alpha * (1.0 - l1_ratio)  # * n_samples
        if not multi_output and sparse.isspmatrix(X):
            # model = cd_fast.sparse_enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X.data, X.indices,
            #     X.indptr, y, X_sparse_scaling,
            #     max_iter, tol, rng, random, positive)
            raise NotImplementedError()
        elif multi_output:
            # model = cd_fast.enet_coordinate_descent_multi_task(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random)
            raise NotImplementedError('Multi output not implemented')
        elif isinstance(precompute, np.ndarray):
            # We expect precompute to be already Fortran ordered when bypassing
            # checks
            if check_input:
                precompute = check_array(precompute, dtype=np.float64,
                                         order='C')
            # model = cd_fast.enet_coordinate_descent_gram(
            #     coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
            #     tol, rng, random, positive)
            raise NotImplementedError()

        elif precompute is False:
            # model = cd_fast.enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,
            #     positive)
            model = fista_l1l2(
                coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,
                positive)
        else:
            raise ValueError("Precompute should be one of True, False, "
                             "'auto' or array-like. Got %r" % precompute)
        coef_, dual_gap_, eps_, n_iter_ = model
        coefs[..., i] = coef_
        dual_gaps[i] = dual_gap_
        n_iters.append(n_iter_)
        #if dual_gap_ > eps_:  # TODO evaluate the dual gap
        if n_iter_ >= max_iter:
            import warnings
            warnings.warn('Objective did not converge.' +
                          ' You might want' +
                          ' to increase the number of iterations.' +
                          ' Fitting data with very small alpha' +
                          ' may cause precision problems.',
                          ConvergenceWarning)

        if verbose:
            if verbose > 2:
                print(model)
            elif verbose > 1:
                print('Path: %03i out of %03i' % (i, n_alphas))
            else:
                import sys
                sys.stderr.write('.')

    if return_n_iter:
        return alphas, coefs, dual_gaps, n_iters
    return alphas, coefs, dual_gaps

Example #12

0

Show file

File: regression.py Project: yfyh2013/l1l2py

def l1l2_regularization(X,
                        y,
                        max_iter=100000,
                        l1_ratio=0.5,
                        eps=1e-3,
                        n_alphas=100,
                        alphas=None,
                        precompute='auto',
                        Xy=None,
                        copy_X=True,
                        coef_init=None,
                        verbose=False,
                        return_n_iter=False,
                        positive=False,
                        tol=1e-5,
                        check_input=True,
                        **params):
    if check_input:
        X = check_array(X,
                        'csc',
                        dtype=[np.float64, np.float32],
                        order='F',
                        copy=copy_X)
        y = check_array(y,
                        'csc',
                        dtype=X.dtype.type,
                        order='F',
                        copy=False,
                        ensure_2d=False)
        if Xy is not None:
            # Xy should be a 1d contiguous array or a 2D C ordered array
            Xy = check_array(Xy,
                             dtype=X.dtype.type,
                             order='C',
                             copy=False,
                             ensure_2d=False)

    _, n_features = X.shape

    multi_output = False
    if y.ndim != 1:
        multi_output = True
        _, n_outputs = y.shape

    # MultiTaskElasticNet does not support sparse matrices
    from scipy import sparse
    if not multi_output and sparse.isspmatrix(X):
        if 'X_offset' in params:
            # As sparse matrices are not actually centered we need this
            # to be passed to the CD solver.
            X_sparse_scaling = params['X_offset'] / params['X_scale']
            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)
        else:
            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)

    # X should be normalized and fit already if function is called
    # from ElasticNet.fit
    if check_input:
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, Xy, precompute, normalize=False,
                     fit_intercept=False, copy=False)
    if alphas is None:
        # No need to normalize of fit_intercept: it has been done above
        alphas = _alpha_grid(X,
                             y,
                             Xy=Xy,
                             l1_ratio=l1_ratio,
                             fit_intercept=False,
                             eps=eps,
                             n_alphas=n_alphas,
                             normalize=False,
                             copy_X=False)
    else:
        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered

    n_alphas = len(alphas)
    tol = params.get('tol', 1e-4)
    max_iter = params.get('max_iter', 1000)
    dual_gaps = np.empty(n_alphas)
    n_iters = []

    rng = check_random_state(params.get('random_state', None))
    selection = params.get('selection', 'cyclic')
    if selection not in ['random', 'cyclic']:
        raise ValueError("selection should be either random or cyclic.")
    random = (selection == 'random')

    if not multi_output:
        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)
    else:
        coefs = np.empty((n_outputs, n_features, n_alphas), dtype=X.dtype)

    if coef_init is None:
        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype))
    else:
        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)

    for i, alpha in enumerate(alphas):
        l1_reg = alpha * l1_ratio * 2  # * n_samples
        l2_reg = alpha * (1.0 - l1_ratio)  # * n_samples
        if not multi_output and sparse.isspmatrix(X):
            # model = cd_fast.sparse_enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X.data, X.indices,
            #     X.indptr, y, X_sparse_scaling,
            #     max_iter, tol, rng, random, positive)
            raise NotImplementedError()
        elif multi_output:
            # model = cd_fast.enet_coordinate_descent_multi_task(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random)
            raise NotImplementedError('Multi output not implemented')
        elif isinstance(precompute, np.ndarray):
            # We expect precompute to be already Fortran ordered when bypassing
            # checks
            if check_input:
                precompute = check_array(precompute,
                                         dtype=np.float64,
                                         order='C')
            # model = cd_fast.enet_coordinate_descent_gram(
            #     coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
            #     tol, rng, random, positive)
            raise NotImplementedError()

        elif precompute is False:
            # model = cd_fast.enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,
            #     positive)
            model = fista_l1l2(coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng,
                               random, positive)
        else:
            raise ValueError("Precompute should be one of True, False, "
                             "'auto' or array-like. Got %r" % precompute)
        coef_, dual_gap_, eps_, n_iter_ = model
        coefs[..., i] = coef_
        dual_gaps[i] = dual_gap_
        n_iters.append(n_iter_)
        #if dual_gap_ > eps_:  # TODO evaluate the dual gap
        if n_iter_ >= max_iter:
            import warnings
            warnings.warn(
                'Objective did not converge.' + ' You might want' +
                ' to increase the number of iterations.' +
                ' Fitting data with very small alpha' +
                ' may cause precision problems.', ConvergenceWarning)

        if verbose:
            if verbose > 2:
                print(model)
            elif verbose > 1:
                print('Path: %03i out of %03i' % (i, n_alphas))
            else:
                import sys
                sys.stderr.write('.')

    if return_n_iter:
        return alphas, coefs, dual_gaps, n_iters
    return alphas, coefs, dual_gaps

Example #13

0

Show file

    def fit(self,
            X,
            y,
            groups=None,
            seed=None,
            verbose=False,
            sample_weight=None,
            option=True):
        """Fit data according to the UoI-Lasso algorithm.
		Relevant information (fits, residuals, model performance) is stored within object.
		Thus, nothing is returned by this function.

		Parameters
		----------
		X : np array (2d)
			the design matrix, containing the predictors.
			its shape is assumed to be (number of samples, number of features).

		y : np array (1d)
			the vector of dependent variables.
			its length is assumed to be (number of samples,).

		seed : int
			a seed for the random number generator. this number is relevant
			for the choosing bootstraps and dividing the data into training and test sets.

		verbose : boolean
			a boolean switch indicating whether the fitting should print out its progress.
		"""
        # initialize the seed, if it's provided

        if seed is not None:
            np.random.seed(seed)

        X, y = check_X_y(X,
                         y,
                         accept_sparse=['csr', 'csc', 'coo'],
                         y_numeric=True,
                         multi_output=True)

        # preprocess data through centering and normalization
        X, y, X_offset, y_offset, X_scale = _preprocess_data(
            X,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X)

        if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)

        # extract model dimensions from design matrix
        self.n_samples_, self.n_features_ = X.shape
        # create or overwrite arrays to collect final results
        self.coef_ = np.zeros(self.n_features_, dtype=np.float32)

        # group leveling
        if groups is None:
            self.groups_ = np.ones(self.n_samples_)
        else:
            self.groups_ = np.array(groups)

        if verbose:
            print('(1) Loaded data.\n %s samples with %s features.' %
                  (self.n_samples_, self.n_features_))

        self.lambdas = _alpha_grid(X=X,
                                   y=y,
                                   l1_ratio=1.0,
                                   fit_intercept=self.fit_intercept,
                                   eps=1e-3,
                                   n_alphas=self.n_lambdas,
                                   normalize=self.normalize)

        # sweep over the grid of regularization strengths
        estimates_selection, _ = \
         self.lasso_sweep(
          X, y, self.lambdas, self.train_frac_sel, self.n_boots_sel,
          self.use_admm, desc='fine lasso sweep', verbose=verbose
         )

        # perform the intersection step
        self.intersection(estimates_selection)

        ########################
        ### Model Estimation ###
        ########################
        # we'll use the supports obtained in the selection module to calculate
        # bagged OLS estimates over bootstraps

        if verbose:
            print('(3) Beginning model estimation, with %s bootstraps.' %
                  self.n_boots_est)

        # compute number of samples per bootstrap
        n_samples_bootstrap = int(round(self.train_frac_est * self.n_samples_))

        # set up data arrays
        estimates = np.zeros(
            (self.n_boots_est, self.n_lambdas, self.n_features_),
            dtype=np.float32)
        scores = np.zeros((self.n_boots_est, self.n_lambdas), dtype=np.float32)

        # iterate over bootstrap samples
        for bootstrap in trange(self.n_boots_est,
                                desc='Model Estimation',
                                disable=not verbose):

            # extract the bootstrap indices, keeping a fraction of the data available for testing
            train_idx, test_idx = utils.leveled_randomized_ids(
                self.groups_, self.train_frac_est)

            # iterate over the regularization parameters
            for lamb_idx, lamb in enumerate(self.lambdas):
                # extract current support set
                support = self.supports_[lamb_idx]

                # extract response vectors
                y_train = y[train_idx]
                y_test = y[test_idx]

                # if nothing was selected, we won't bother running OLS
                if np.any(support):
                    # get design matrices
                    X_train = X[train_idx][:, support]
                    X_test = X[test_idx][:, support]

                    # compute ols estimate
                    ols = lm.LinearRegression()
                    ols.fit(X_train, y_train)

                    # store the fitted coefficients
                    estimates[bootstrap, lamb_idx, support] = ols.coef_

                    # calculate estimation score
                    if self.estimation_score == 'r2':
                        scores[bootstrap, lamb_idx] = ols.score(X_test, y_test)
                    elif self.estimation_score == 'BIC':
                        y_pred = ols.predict(X_test)
                        n_features = np.count_nonzero(support)
                        scores[bootstrap,
                               lamb_idx] = -utils.BIC(y_true=y_test,
                                                      y_pred=y_pred,
                                                      n_features=n_features)
                    elif self.estimation_score == 'AIC':
                        y_pred = ols.predict(X_test)
                        n_features = np.count_nonzero(support)
                        scores[bootstrap,
                               lamb_idx] = -utils.AIC(y_true=y_test,
                                                      y_pred=y_pred,
                                                      n_features=n_features)
                    elif self.estimation_score == 'AICc':
                        y_pred = ols.predict(X_test)
                        n_features = np.count_nonzero(support)
                        scores[bootstrap,
                               lamb_idx] = -utils.AICc(y_true=y_test,
                                                       y_pred=y_pred,
                                                       n_features=n_features)
                    else:
                        raise ValueError(
                            str(self.estimation_score) +
                            ' is not a valid option.')
                else:
                    if self.estimation_score == 'r2':
                        scores[bootstrap, lamb_idx] = r2_score(
                            y_true=y_test, y_pred=np.zeros(y_test.size))
                    elif self.estimation_score == 'BIC':
                        n_features = 0
                        scores[bootstrap, lamb_idx] = -utils.BIC(
                            y_true=y_test,
                            y_pred=np.zeros(y_test.size),
                            n_features=n_features)
                    elif self.estimation_score == 'AIC':
                        n_features = 0
                        scores[bootstrap, lamb_idx] = -utils.AIC(
                            y_true=y_test,
                            y_pred=np.zeros(y_test.size),
                            n_features=n_features)
                    elif self.estimation_score == 'AICc':
                        n_features = 0
                        scores[bootstrap, lamb_idx] = -utils.AICc(
                            y_true=y_test,
                            y_pred=np.zeros(y_test.size),
                            n_features=n_features)
                    else:
                        raise ValueError(
                            str(self.estimation_score) +
                            ' is not a valid option.')

        if verbose:
            print('(4) Bagging estimates, using bagging option %s.' %
                  self.bagging_options)

        # bagging option 1:
        #	for each bootstrap sample, find the regularization parameter that gave the best results
        if self.bagging_options == 1:
            self.lambda_max_idx = np.argmax(scores, axis=1)
            # extract the estimates over bootstraps from the model with best lambda
            best_estimates = estimates[np.arange(self.n_boots_est),
                                       self.lambda_max_idx, :]
            # take the median across estimates for the final, bagged estimate
            self.coef_ = np.median(best_estimates, axis=0)

        # bagging option 2:
        #	average estimates across bootstraps, and then find the regularization parameter that gives the best results
        elif self.bagging_options == 2:
            mean_scores = np.mean(scores, axis=0)
            self.lambda_max_idx = np.argmax(mean_scores)
            self.coef_ = np.median(estimates[:, self.lambda_max_idx, :], 0)

        else:
            raise ValueError('Bagging option %d is not available.' %
                             self.bagging_options)

        if verbose:
            print("---> UoI Lasso complete.")

        self._set_intercept(X_offset, y_offset, X_scale)

        return self

Example #14

0

Show file

File: newsgroup_random_vs_cyclic.py Project: MechCoder/Sklearn_benchmarks

# y = np.fromfile(f, dtype=np.int32, sep=' ')

random_time4 = []
random_time8 = []
cyclic_time4 = []
cyclic_time8 = []
random_iter4 = []
random_iter8 = []
cyclic_iter4 = []
cyclic_iter8 = []
random_score4 = []
random_score8 = []
cyclic_score4 = []
cyclic_score8 = []

alphas = _alpha_grid(X, y, n_alphas=20)
for alpha in alphas:

    r_time4, r_iter4, r_score4, r_time8, r_iter8, r_score8 = 0, 0, 0, 0, 0, 0
    c_time4, c_iter4, c_score4, c_time8, c_iter8, c_score8 = 0, 0, 0, 0, 0, 0

    for n_iter in [0, 1, 2]:
        X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.33, random_state=n_iter)

        clf = ElasticNet(max_iter=500000, alpha=alpha, tol=1e-4)
        print("......") + str(alpha)
        t = time()
        clf.fit(X_train, y_train)
        c_time4 += time() - t
        y_pred = np.sign(clf.predict(X_test))
        c_iter4 += clf.n_iter_