def path_calc(X, y, X_holdout, y_holdout, alphas, paramgrid, colname = 'CV', yname = '', method = 'Elastic Net'): #make a copy of the parameters before popping things off copy_params = copy.deepcopy(paramgrid) fit_intercept = copy_params.pop('fit_intercept') precompute = copy_params.pop('precompute') copy_X = copy_params.pop('copy_X') normalize = False # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once X, y = check_X_y(X, y, accept_sparse='csc', order='F', dtype=[np.float64, np.float32], copy=copy_X and fit_intercept, multi_output=True, y_numeric=True) y = check_array(y, order='F', copy=False, dtype=X.dtype.type, ensure_2d=False) #this is the step that gives the data to find intercept if fit_intercept is true. X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X, y, None, precompute, normalize, fit_intercept, copy=False) y = np.squeeze(y) #do the path calculation, and tell how long it took print('Calculating path...') start_t = time.time() if method == 'Elastic Net': path_alphas, path_coefs, path_gaps, path_iters = enet_path(X, y, alphas=alphas, return_n_iter = True, **copy_params) if method == 'LASSO': path_alphas, path_coefs, path_gaps, path_iters = lasso_path(X, y, alphas=alphas, return_n_iter=True, **copy_params) dt = time.time() - start_t print('Took ' + str(dt) + ' seconds') #create some empty arrays to store the result y_pred_holdouts = np.empty(shape=(len(alphas),len(y_holdout))) intercepts = np.empty(shape=(len(alphas))) rmses = np.empty(shape=(len(alphas))) cvcols = [] for j in list(range(len(path_alphas))): coef_temp = path_coefs[:, j] if fit_intercept: coef_temp = coef_temp / X_scale intercept = y_offset - np.dot(X_offset, coef_temp.T) else: intercept = 0. y_pred_holdouts[j,:] = np.dot(X_holdout, path_coefs[:, j]) + intercept intercepts[j] = intercept rmses[j] = RMSE(y_pred_holdouts[j,:], y_holdout) cvcols.append(('predict','"'+ method + ' - ' + yname + ' - ' + colname + ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"')) return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols
def fit(self, X, y, M=None): # Fit the model using X, y as training data. # Parameters #---------- # X : array-like, shape (n_samples, n_features) # Training data. # y : array-like, shape (n_samples,) or (n_samples, n_targets) # Target values. # Returns #------- # self : object # returns an instance of self. coeff = stpforward(y, X, M) X, y, X_mean, y_mean, X_std, Gram, Xy = _pre_fit( X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=True) self.coef_ = coeff # MODIFY HERE !!! return self
def fit(self, X, y, *args, **kwargs): X = np.asanyarray(X) y = np.asanyarray(y) # Centering Data X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=False) # Calling the class-specific train method self._fit(X, y, *args, **kwargs) # Fitting the intercept if required self._set_intercept(X_offset, y_offset, X_scale) self._trained = True return self
def path_calc(X, y, X_holdout, y_holdout, alphas, paramgrid, colname='CV', yname='', method='Elastic Net'): #make a copy of the parameters before popping things off copy_params = copy.deepcopy(paramgrid) fit_intercept = copy_params.pop('fit_intercept') precompute = copy_params.pop('precompute') copy_X = copy_params.pop('copy_X') normalize = False # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once X, y = check_X_y(X, y, accept_sparse='csc', order='F', dtype=[np.float64, np.float32], copy=copy_X and fit_intercept, multi_output=True, y_numeric=True) y = check_array(y, order='F', copy=False, dtype=X.dtype.type, ensure_2d=False) #this is the step that gives the data to find intercept if fit_intercept is true. X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X, y, None, precompute, normalize, fit_intercept, copy=False) y = np.squeeze(y) #do the path calculation, and tell how long it took print('Calculating path...') start_t = time.time() if method == 'Elastic Net': path_alphas, path_coefs, path_gaps, path_iters = enet_path( X, y, alphas=alphas, return_n_iter=True, **copy_params) if method == 'LASSO': path_alphas, path_coefs, path_gaps, path_iters = lasso_path( X, y, alphas=alphas, return_n_iter=True, **copy_params) dt = time.time() - start_t print('Took ' + str(dt) + ' seconds') #create some empty arrays to store the result y_pred_holdouts = np.empty(shape=(len(alphas), len(y_holdout))) intercepts = np.empty(shape=(len(alphas))) rmses = np.empty(shape=(len(alphas))) cvcols = [] for j in list(range(len(path_alphas))): coef_temp = path_coefs[:, j] if fit_intercept: coef_temp = coef_temp / X_scale intercept = y_offset - np.dot(X_offset, coef_temp.T) else: intercept = 0. y_pred_holdouts[j, :] = np.dot(X_holdout, path_coefs[:, j]) + intercept intercepts[j] = intercept rmses[j] = RMSE(y_pred_holdouts[j, :], y_holdout) cvcols.append( ('predict', '"' + method + ' - ' + yname + ' - ' + colname + ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"')) return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols
def fit(self, X, y, check_input=True): """Fit model with coordinate descent. Parameters ----------- X : ndarray or scipy.sparse matrix, (n_samples, n_features) Data y : ndarray, shape (n_samples,) or (n_samples, n_targets) Target check_input : boolean, (default=True) Allow to bypass several input checking. Don't use this parameter unless you know what you do. Notes ----- Coordinate descent is an algorithm that considers each column of data at a time hence it will automatically convert the X input as a Fortran-contiguous numpy array if necessary. To avoid memory re-allocation it is advised to allocate the initial data in memory directly using that format. """ if self.alpha == 0: warnings.warn( "With alpha=0, this algorithm does not converge " "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) if isinstance(self.precompute, six.string_types): raise ValueError('precompute should be one of True, False or' ' array-like. Got %r' % self.precompute) # We expect X and y to be float64 or float32 Fortran ordered arrays # when bypassing checks if check_input: X, y = check_X_y(X, y, accept_sparse='csc', order='F', dtype=[np.float64, np.float32], copy=self.copy_X and self.fit_intercept, multi_output=True, y_numeric=True) y = check_array(y, order='F', copy=False, dtype=X.dtype.type, ensure_2d=False) X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=False) if y.ndim == 1: y = y[:, None] if Xy is not None and Xy.ndim == 1: Xy = Xy[:, None] n_samples, n_features = X.shape n_targets = y.shape[1] if self.selection not in ['cyclic', 'random']: raise ValueError("selection should be either random or cyclic.") if not self.warm_start or self.coef_ is None: coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order='F') else: coef_ = self.coef_ if coef_.ndim == 1: coef_ = coef_[None, :] dual_gaps_ = np.zeros(n_targets, dtype=X.dtype) self.n_iter_ = [] history = [] for k in xrange(n_targets): if self.mode == 'admm': this_coef, hist, this_iter = \ group_lasso_overlap( X, y[:, k], lamda=self.alpha, groups=self.groups, rho=self.rho, max_iter=self.max_iter, tol=self.tol, verbose=self.verbose, rtol=self.rtol) else: # paspal wrapper this_coef, hist, this_iter = \ group_lasso_overlap_paspal( X, y[:, k], lamda=self.alpha, groups=self.groups, rho=self.rho, max_iter=self.max_iter, tol=self.tol, verbose=self.verbose, rtol=self.rtol, matlab_engine=self.matlab_engine) coef_[k] = this_coef.ravel() history.append(hist) self.n_iter_.append(this_iter) if n_targets == 1: self.n_iter_ = self.n_iter_[0] self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_]) self._set_intercept(X_offset, y_offset, X_scale) # workaround since _set_intercept will cast self.coef_ into float64 self.coef_ = np.asarray(self.coef_, dtype=X.dtype) self.history_ = history # return self for chaining fit and predict calls return self
def l1l2_regularization( X, y, max_iter=100000, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, precompute='auto', Xy=None, copy_X=True, coef_init=None, verbose=False, return_n_iter=False, positive=False, tol=1e-5, check_input=True, **params): if check_input: X = check_array(X, 'csc', dtype=[np.float64, np.float32], order='F', copy=copy_X) y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False, ensure_2d=False) if Xy is not None: # Xy should be a 1d contiguous array or a 2D C ordered array Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False, ensure_2d=False) _, n_features = X.shape multi_output = False if y.ndim != 1: multi_output = True _, n_outputs = y.shape # MultiTaskElasticNet does not support sparse matrices from scipy import sparse if not multi_output and sparse.isspmatrix(X): if 'X_offset' in params: # As sparse matrices are not actually centered we need this # to be passed to the CD solver. X_sparse_scaling = params['X_offset'] / params['X_scale'] X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype) else: X_sparse_scaling = np.zeros(n_features, dtype=X.dtype) # X should be normalized and fit already if function is called # from ElasticNet.fit if check_input: X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, Xy, precompute, normalize=False, fit_intercept=False, copy=False) if alphas is None: # No need to normalize of fit_intercept: it has been done above alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio, fit_intercept=False, eps=eps, n_alphas=n_alphas, normalize=False, copy_X=False) else: alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered n_alphas = len(alphas) tol = params.get('tol', 1e-4) max_iter = params.get('max_iter', 1000) dual_gaps = np.empty(n_alphas) n_iters = [] rng = check_random_state(params.get('random_state', None)) selection = params.get('selection', 'cyclic') if selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.") random = (selection == 'random') if not multi_output: coefs = np.empty((n_features, n_alphas), dtype=X.dtype) else: coefs = np.empty((n_outputs, n_features, n_alphas), dtype=X.dtype) if coef_init is None: coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype)) else: coef_ = np.asfortranarray(coef_init, dtype=X.dtype) for i, alpha in enumerate(alphas): l1_reg = alpha * l1_ratio * 2 # * n_samples l2_reg = alpha * (1.0 - l1_ratio) # * n_samples if not multi_output and sparse.isspmatrix(X): # model = cd_fast.sparse_enet_coordinate_descent( # coef_, l1_reg, l2_reg, X.data, X.indices, # X.indptr, y, X_sparse_scaling, # max_iter, tol, rng, random, positive) raise NotImplementedError() elif multi_output: # model = cd_fast.enet_coordinate_descent_multi_task( # coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random) raise NotImplementedError('Multi output not implemented') elif isinstance(precompute, np.ndarray): # We expect precompute to be already Fortran ordered when bypassing # checks if check_input: precompute = check_array(precompute, dtype=np.float64, order='C') # model = cd_fast.enet_coordinate_descent_gram( # coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter, # tol, rng, random, positive) raise NotImplementedError() elif precompute is False: # model = cd_fast.enet_coordinate_descent( # coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, # positive) model = fista_l1l2( coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive) else: raise ValueError("Precompute should be one of True, False, " "'auto' or array-like. Got %r" % precompute) coef_, dual_gap_, eps_, n_iter_ = model coefs[..., i] = coef_ dual_gaps[i] = dual_gap_ n_iters.append(n_iter_) #if dual_gap_ > eps_: # TODO evaluate the dual gap if n_iter_ >= max_iter: import warnings warnings.warn('Objective did not converge.' + ' You might want' + ' to increase the number of iterations.' + ' Fitting data with very small alpha' + ' may cause precision problems.', ConvergenceWarning) if verbose: if verbose > 2: print(model) elif verbose > 1: print('Path: %03i out of %03i' % (i, n_alphas)) else: import sys sys.stderr.write('.') if return_n_iter: return alphas, coefs, dual_gaps, n_iters return alphas, coefs, dual_gaps
def l1l2_regularization(X, y, max_iter=100000, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, precompute='auto', Xy=None, copy_X=True, coef_init=None, verbose=False, return_n_iter=False, positive=False, tol=1e-5, check_input=True, **params): if check_input: X = check_array(X, 'csc', dtype=[np.float64, np.float32], order='F', copy=copy_X) y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False, ensure_2d=False) if Xy is not None: # Xy should be a 1d contiguous array or a 2D C ordered array Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False, ensure_2d=False) _, n_features = X.shape multi_output = False if y.ndim != 1: multi_output = True _, n_outputs = y.shape # MultiTaskElasticNet does not support sparse matrices from scipy import sparse if not multi_output and sparse.isspmatrix(X): if 'X_offset' in params: # As sparse matrices are not actually centered we need this # to be passed to the CD solver. X_sparse_scaling = params['X_offset'] / params['X_scale'] X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype) else: X_sparse_scaling = np.zeros(n_features, dtype=X.dtype) # X should be normalized and fit already if function is called # from ElasticNet.fit if check_input: X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, Xy, precompute, normalize=False, fit_intercept=False, copy=False) if alphas is None: # No need to normalize of fit_intercept: it has been done above alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio, fit_intercept=False, eps=eps, n_alphas=n_alphas, normalize=False, copy_X=False) else: alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered n_alphas = len(alphas) tol = params.get('tol', 1e-4) max_iter = params.get('max_iter', 1000) dual_gaps = np.empty(n_alphas) n_iters = [] rng = check_random_state(params.get('random_state', None)) selection = params.get('selection', 'cyclic') if selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.") random = (selection == 'random') if not multi_output: coefs = np.empty((n_features, n_alphas), dtype=X.dtype) else: coefs = np.empty((n_outputs, n_features, n_alphas), dtype=X.dtype) if coef_init is None: coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype)) else: coef_ = np.asfortranarray(coef_init, dtype=X.dtype) for i, alpha in enumerate(alphas): l1_reg = alpha * l1_ratio * 2 # * n_samples l2_reg = alpha * (1.0 - l1_ratio) # * n_samples if not multi_output and sparse.isspmatrix(X): # model = cd_fast.sparse_enet_coordinate_descent( # coef_, l1_reg, l2_reg, X.data, X.indices, # X.indptr, y, X_sparse_scaling, # max_iter, tol, rng, random, positive) raise NotImplementedError() elif multi_output: # model = cd_fast.enet_coordinate_descent_multi_task( # coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random) raise NotImplementedError('Multi output not implemented') elif isinstance(precompute, np.ndarray): # We expect precompute to be already Fortran ordered when bypassing # checks if check_input: precompute = check_array(precompute, dtype=np.float64, order='C') # model = cd_fast.enet_coordinate_descent_gram( # coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter, # tol, rng, random, positive) raise NotImplementedError() elif precompute is False: # model = cd_fast.enet_coordinate_descent( # coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, # positive) model = fista_l1l2(coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive) else: raise ValueError("Precompute should be one of True, False, " "'auto' or array-like. Got %r" % precompute) coef_, dual_gap_, eps_, n_iter_ = model coefs[..., i] = coef_ dual_gaps[i] = dual_gap_ n_iters.append(n_iter_) #if dual_gap_ > eps_: # TODO evaluate the dual gap if n_iter_ >= max_iter: import warnings warnings.warn( 'Objective did not converge.' + ' You might want' + ' to increase the number of iterations.' + ' Fitting data with very small alpha' + ' may cause precision problems.', ConvergenceWarning) if verbose: if verbose > 2: print(model) elif verbose > 1: print('Path: %03i out of %03i' % (i, n_alphas)) else: import sys sys.stderr.write('.') if return_n_iter: return alphas, coefs, dual_gaps, n_iters return alphas, coefs, dual_gaps
def enet_path_adaptive(X, y, mask, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, precompute='auto', Xy=None, copy_X=True, coef_init=None, verbose=False, return_n_iter=False, positive=False, check_input=True, **params): """Compute elastic net path with coordinate descent The elastic net optimization function varies for mono and multi-outputs. For mono-output tasks it is:: 1 / (2 * n_samples) * ||y - Xw||^2_2 + alpha * l1_ratio * ||w||_1 + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2 For multi-output tasks it is:: (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * l1_ratio * ||W||_21 + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2 Where:: ||W||_21 = \sum_i \sqrt{\sum_j w_{ij}^2} i.e. the sum of norm of each row. Read more in the :ref:`User Guide <elastic_net>`. Parameters ---------- X : {array-like}, shape (n_samples, n_features) Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If ``y`` is mono-output then ``X`` can be sparse. y : ndarray, shape (n_samples,) or (n_samples, n_outputs) Target values l1_ratio : float, optional float between 0 and 1 passed to elastic net (scaling between l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso eps : float Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3`` n_alphas : int, optional Number of alphas along the regularization path alphas : ndarray, optional List of alphas where to compute the models. If None alphas are set automatically precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument. Xy : array-like, optional Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. coef_init : array, shape (n_features, ) | None The initial values of the coefficients. verbose : bool or integer Amount of verbosity. params : kwargs keyword arguments passed to the coordinate descent solver. return_n_iter : bool whether to return the number of iterations or not. positive : bool, default False If set to True, forces coefficients to be positive. check_input : bool, default True Skip input validation checks, including the Gram matrix when provided assuming there are handled by the caller when check_input=False. Returns ------- alphas : array, shape (n_alphas,) The alphas along the path where models are computed. coefs : array, shape (n_features, n_alphas) or \ (n_outputs, n_features, n_alphas) Coefficients along the path. dual_gaps : array, shape (n_alphas,) The dual gaps at the end of the optimization for each alpha. n_iters : array-like, shape (n_alphas,) The number of iterations taken by the coordinate descent optimizer to reach the specified tolerance for each alpha. (Is returned when ``return_n_iter`` is set to True). Notes ----- See examples/linear_model/plot_lasso_coordinate_descent_path.py for an example. See also -------- MultiTaskElasticNet MultiTaskElasticNetCV ElasticNet ElasticNetCV """ # We expect X and y to be already Fortran ordered when bypassing # checks if check_input: X = check_array(X, 'csc', dtype=[np.float64, np.float32], order='F', copy=copy_X) y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False, ensure_2d=False) if Xy is not None: # Xy should be a 1d contiguous array or a 2D C ordered array Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False, ensure_2d=False) n_samples, n_features = X.shape multi_output = False if y.ndim != 1: multi_output = True _, n_outputs = y.shape # MultiTaskElasticNet does not support sparse matrices if not multi_output and sparse.isspmatrix(X): if 'X_offset' in params: # As sparse matrices are not actually centered we need this # to be passed to the CD solver. X_sparse_scaling = params['X_offset'] / params['X_scale'] X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype) else: X_sparse_scaling = np.zeros(n_features, dtype=X.dtype) # X should be normalized and fit already if function is called # from ElasticNet.fit if check_input: X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, Xy, precompute, normalize=False, fit_intercept=False, copy=False) if len(mask) != n_samples: tmp_alpha = np.zeros(n_samples) tmp_alpha[np.arange(len(mask))] = mask mask = tmp_alpha if alphas is None: # No need to normalize of fit_intercept: it has been done # above alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio, fit_intercept=False, eps=eps, n_alphas=n_alphas, normalize=False, copy_X=False) else: alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered n_alphas = len(alphas) tol = params.get('tol', 1e-4) max_iter = params.get('max_iter', 1000) dual_gaps = np.empty(n_alphas) n_iters = [] rng = check_random_state(params.get('random_state', None)) selection = params.get('selection', 'cyclic') if selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.") random = (selection == 'random') if not multi_output: coefs = np.empty((n_features, n_alphas), dtype=X.dtype) else: coefs = np.empty((n_outputs, n_features, n_alphas), dtype=X.dtype) if coef_init is None: coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype)) else: coef_ = np.asfortranarray(coef_init, dtype=X.dtype) for i, alpha in enumerate(alphas): # a vector now l1_reg = alpha * l1_ratio * n_samples * mask # l2_reg = alpha * (1.0 - l1_ratio) * n_samples if not multi_output and sparse.isspmatrix(X): model = cd_fast_adaptive.sparse_enet_coordinate_descent_adaptive( coef_, l1_reg, l2_reg, X.data, X.indices, X.indptr, y, X_sparse_scaling, max_iter, tol, rng, random, positive) elif multi_output: l1_reg_scalar = alpha* l1_ratio * n_samples model = cd_fast_adaptive.enet_coordinate_descent_multi_task( coef_, l1_reg_scalar, l2_reg, X, y, max_iter, tol, rng, random) elif isinstance(precompute, np.ndarray): # We expect precompute to be already Fortran ordered when bypassing # checks if check_input: precompute = check_array(precompute, dtype=np.float64, order='C') model = cd_fast_adaptive.enet_coordinate_descent_gram_adaptive( coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter, tol, rng, random, positive) elif precompute is False: model = cd_fast_adaptive.enet_coordinate_descent_adaptive( coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive) else: raise ValueError("Precompute should be one of True, False, " "'auto' or array-like. Got %r" % precompute) coef_, dual_gap_, eps_, n_iter_ = model coefs[..., i] = coef_ dual_gaps[i] = dual_gap_ n_iters.append(n_iter_) if dual_gap_ > eps_: warnings.warn('Objective did not converge.' + ' You might want' + ' to increase the number of iterations.' + ' Fitting data with very small alpha' + ' may cause precision problems.', ConvergenceWarning) if verbose: if verbose > 2: print(model) elif verbose > 1: print('Path: %03i out of %03i' % (i, n_alphas)) else: sys.stderr.write('.') if return_n_iter: return alphas, coefs, dual_gaps, n_iters return alphas, coefs, dual_gaps
def fit(self, X, y, check_input=True): if self.alpha == 0: warnings.warn( "With alpha=0, this algorithm does not converge " "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) if (isinstance(self.precompute, six.string_types) and self.precompute == 'auto'): warnings.warn( "Setting precompute to 'auto', was found to be " "slower even when n_samples > n_features. Hence " "it will be removed in 0.18.", DeprecationWarning, stacklevel=2) if not (self.eta > 0 and self.eta < 1): self.eta = 0.5 warnings.warn( "Value given for eta is invalid. It must satisfy the " "constraint 0 < eta < 1. Setting eta to the default " "value (0.5).", stacklevel=2) if not (self.init_step > 0): self.init_step = 10 warnings.warn( "Value given for init_step is invalid. It must be " "a positive number. Setting init_step to the default " "value (10).", stacklevel=2) if check_input: # Ensure that X and y are float64 Fortran ordered arrays. # Also check for consistency in the dimensions, and that y doesn't # contain np.nan or np.inf entries. y = np.asarray(y, dtype=np.float64) X, y = check_X_y(X, y, accept_sparse='csc', dtype=np.float64, order='F', copy=self.copy_X and self.fit_intercept, multi_output=True, y_numeric=True) y = check_array(y, dtype=np.float64, order='F', copy=False, ensure_2d=False) # Centre and normalise the data X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=False) if y.ndim == 1: y = y[:, np.newaxis] if Xy is not None and Xy.ndim == 1: Xy = Xy[:, np.newaxis] n_samples, n_features = X.shape n_targets = y.shape[1] if not self.warm_start or self.coef_ is None: # Initial guess for coef_ is zero coef_ = np.zeros((n_targets, n_features), dtype=np.float64, order='F') else: # Use previous value of coef_ as initial guess coef_ = self.coef_ if coef_.ndim == 1: coef_ = coef_[np.newaxis, :] dual_gaps_ = np.zeros(n_targets, dtype=np.float64) self.n_iter_ = [] # Perform the optimisation for k in xrange(n_targets): if Xy is not None: this_Xy = Xy[:, k] else: this_Xy = None _, this_coef, this_dual_gap, this_iter = \ self.path(X, y[:, k], l1_ratio=self.l1_ratio, eps=None, eta = self.eta, init_step = self.init_step, n_alphas=None, alphas=[self.alpha], precompute=precompute, Xy=this_Xy, fit_intercept=False, adaptive_step=self.adaptive_step, normalize=False, copy_X=True, verbose=False, tol=self.tol, X_offset=X_offset, X_scale=X_scale, return_n_iter=True, coef_init=coef_[k], max_iter=self.max_iter, check_input=False) coef_[k] = this_coef[:, 0] dual_gaps_[k] = this_dual_gap[0] self.n_iter_.append(this_iter[0]) if n_targets == 1: self.n_iter_ = self.n_iter_[0] self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_]) self._set_intercept(X_offset, y_offset, X_scale) # return self for chaining fit and predict calls return self
def enet_path(X, y, l1_ratio=0.5, eps=1e-3, eta=0.5, init_step=10, adaptive_step=True, n_alphas=100, alphas=None, precompute='auto', Xy=None, copy_X=True, coef_init=None, verbose=False, return_n_iter=False, check_input=True, **params): """Compute elastic net path with coordinate descent The optimization function is:: 1 / (2 * n_samples) * ||y - Xw||^2_2 + alpha * l1_ratio * ||w||_1 + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2 Read more in the :ref:`User Guide <elastic_net>`. Parameters ---------- X : {array-like}, shape (n_samples, n_features) Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If ``y`` is mono-output then ``X`` can be sparse. y : ndarray, shape (n_samples,) or (n_samples, n_outputs) Target values l1_ratio : float, optional float between 0 and 1 passed to elastic net (scaling between l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso eps : float Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3`` n_alphas : int, optional Number of alphas along the regularization path alphas : ndarray, optional List of alphas where to compute the models. If None alphas are set automatically eta : float, optional Shrinkage parameter for backtracking line search. It must satisfy 0 < eta < 1. init_step : float, optional Initial step size used for the backtracking line search. It must be a positive number. adaptive_step : boolean, optional, default True Whether to calculate the optimal step size or use an adaptive step size chosen through a backtracking line search. precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument. Xy : array-like, optional Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. coef_init : array, shape (n_features, ) | None The initial values of the coefficients. verbose : bool or integer Amount of verbosity. params : kwargs keyword arguments passed to the coordinate descent solver. return_n_iter : bool whether to return the number of iterations or not. check_input : bool, default True Skip input validation checks, including the Gram matrix when provided assuming there are handled by the caller when check_input=False. Returns ------- alphas : array, shape (n_alphas,) The alphas along the path where models are computed. coefs : array, shape (n_features, n_alphas) or \ (n_outputs, n_features, n_alphas) Coefficients along the path. dual_gaps : array, shape (n_alphas,) The dual gaps at the end of the optimization for each alpha. n_iters : array-like, shape (n_alphas,) The number of iterations taken by the coordinate descent optimizer to reach the specified tolerance for each alpha. (Is returned when ``return_n_iter`` is set to True). See also -------- ElasticNet ElasticNetCV """ # Direct prox_fast to use fixed optimal step size by passing eta = 0 and # init_step = 0 (which would otherwise be invalid) if not adaptive_step: eta_ = 0 init_step_ = 0 else: eta_ = eta init_step_ = init_step # We expect X and y to be already float64 Fortran ordered when bypassing # checks if check_input: X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X) y = check_array(y, 'csc', dtype=np.float64, order='F', copy=False, ensure_2d=False) if Xy is not None: # Xy should be a 1d contiguous array or a 2D C ordered array Xy = check_array(Xy, dtype=np.float64, order='C', copy=False, ensure_2d=False) n_samples, n_features = X.shape # MultiTaskElasticNet does not support sparse matrices if sparse.isspmatrix(X): if 'X_offset' in params: # As sparse matrices are not actually centered we need this # to be passed to the CD solver. X_sparse_scaling = params['X_offset'] / params['X_scale'] else: X_sparse_scaling = np.zeros(n_features) # X should be normalized and fit already if function is called # from ElasticNet.fit if check_input: X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, Xy, precompute, normalize=False, fit_intercept=False, copy=False) if alphas is None: # No need to normalize of fit_intercept: it has been done # above alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio, fit_intercept=False, eps=eps, n_alphas=n_alphas, normalize=False, copy_X=False) else: alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered n_alphas = len(alphas) tol = params.get('tol', 1e-4) max_iter = params.get('max_iter', 10000) dual_gaps = np.empty(n_alphas) n_iters = [] coefs = np.empty((n_features, n_alphas), dtype=np.float64) if coef_init is None: coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1])) else: coef_ = np.asfortranarray(coef_init) for i, alpha in enumerate(alphas): l1_reg = alpha * l1_ratio * n_samples l2_reg = alpha * (1.0 - l1_ratio) * n_samples if sparse.isspmatrix(X): model = prox_fast.sparse_enet_prox_gradient( coef_, l1_reg, l2_reg, X.data, X.indices, X.indptr, y, X_sparse_scaling, max_iter, tol) elif isinstance(precompute, np.ndarray): # We expect precompute to be already Fortran ordered when bypassing # checks if check_input: precompute = check_array(precompute, dtype=np.float64, order='C') model = prox_fast.enet_prox_gradient_gram(coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter, eta_, init_step_, tol) elif precompute is False: model = prox_fast.enet_prox_gradient(coef_, l1_reg, l2_reg, X, y, max_iter, eta_, init_step_, tol) else: raise ValueError("Precompute should be one of True, False, " "'auto' or array-like") coef_, dual_gap_, tol_, n_iter_ = model coefs[..., i] = coef_ dual_gaps[i] = dual_gap_ n_iters.append(n_iter_) if dual_gap_ > tol_: warnings.warn( 'Objective did not converge.' + ' You might want' + ' to increase the number of iterations', ConvergenceWarning) if verbose: if verbose > 2: print(model) elif verbose > 1: print('Path: %03i out of %03i' % (i, n_alphas)) else: sys.stderr.write('.') if return_n_iter: return alphas, coefs, dual_gaps, n_iters return alphas, coefs, dual_gaps
def _path_residuals(X, y, train, test, path, path_params, alphas=None, l1_ratio=1, X_order=None, dtype=None): """Returns the MSE for the models computed by 'path' Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. y : array-like, shape (n_samples,) or (n_samples, n_targets) Target values train : list of indices The indices of the train set test : list of indices The indices of the test set path : callable function returning a list of models on the path. See enet_path for an example of signature path_params : dictionary Parameters passed to the path function alphas : array-like, optional Array of float that is used for cross-validation. If not provided, computed using 'path' l1_ratio : float, optional float between 0 and 1 passed to ElasticNet (scaling between l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2 X_order : {'F', 'C', or None}, optional The order of the arrays expected by the path function to avoid memory copies dtype : a numpy dtype or None The dtype of the arrays expected by the path function to avoid memory copies """ X_train = X[train] y_train = y[train] X_test = X[test] y_test = y[test] fit_intercept = path_params['fit_intercept'] normalize = path_params['normalize'] if y.ndim == 1: precompute = path_params['precompute'] else: # No Gram variant of multi-task exists right now. # Fall back to default enet_multitask precompute = False X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X_train, y_train, None, precompute, normalize, fit_intercept, copy=False) path_params = path_params.copy() path_params['Xy'] = Xy path_params['X_offset'] = X_offset path_params['X_scale'] = X_scale path_params['precompute'] = precompute path_params['copy_X'] = False path_params['alphas'] = alphas if 'l1_ratio' in path_params: path_params['l1_ratio'] = l1_ratio # Do the ordering and type casting here, as if it is done in the path, # X is copied and a reference is kept here X_train = check_array(X_train, 'csc', dtype=dtype, order=X_order) alphas, coefs, _ = path(X_train, y_train, **path_params) del X_train, y_train if y.ndim == 1: # Doing this so that it becomes coherent with multioutput. coefs = coefs[np.newaxis, :, :] y_offset = np.atleast_1d(y_offset) y_test = y_test[:, np.newaxis] if normalize: nonzeros = np.flatnonzero(X_scale) coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis] intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs) if sparse.issparse(X_test): n_order, n_features, n_alphas = coefs.shape # Work around for sparse matices since coefs is a 3-D numpy array. coefs_feature_major = np.rollaxis(coefs, 1) feature_2d = np.reshape(coefs_feature_major, (n_features, -1)) X_test_coefs = safe_sparse_dot(X_test, feature_2d) X_test_coefs = X_test_coefs.reshape(X_test.shape[0], n_order, -1) else: X_test_coefs = safe_sparse_dot(X_test, coefs) residues = X_test_coefs - y_test[:, :, np.newaxis] residues += intercepts this_mses = ((residues**2).mean(axis=0)).mean(axis=0) return this_mses
def fit(self, X, y, check_input=True): if check_input: X, y = check_X_y(X, y, accept_sparse='csc', order='F', dtype=[np.float64, np.float32], copy=self.copy_X and self.fit_intercept, multi_output=True, y_numeric=True) y = check_array(y, order='F', copy=False, dtype=X.dtype.type, ensure_2d=False) X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=False) # # Centering Data # if self.fit_intercept: # X, Xmean = center(X, return_mean=True) # y, ymean = center(y, return_mean=True) if y.ndim == 1: y = y[:, np.newaxis] if Xy is not None and Xy.ndim == 1: Xy = Xy[:, np.newaxis] n_samples, n_features = X.shape n_targets = y.shape[1] if self.selection not in ['cyclic', 'random']: raise ValueError("selection should be either random or cyclic.") if not self.warm_start or self.coef_ is None: coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order='F') else: coef_ = self.coef_ if coef_.ndim == 1: coef_ = coef_[np.newaxis, :] dual_gaps_ = np.zeros(n_targets, dtype=X.dtype) self.n_iter_ = [] for k in xrange(n_targets): if Xy is not None: this_Xy = Xy[:, k] else: this_Xy = None _, this_coef, this_dual_gap, this_iter = enet_admm_path( X, y[:, k], rho=self.rho, alpha=self.alpha, max_iter=self.max_iter, return_n_iter=True, abs_tol=self.abs_tol, rel_tol=self.rel_tol, tau=self.tau, mu=self.mu, alphas=[self.mu]) coef_[k] = this_coef[:, 0] dual_gaps_[k] = this_dual_gap[0] self.n_iter_.append(this_iter[0]) # # Fitting the intercept if required # if self.fit_intercept: # self._intercept = ymean - np.dot(Xmean, self.coef_) # else: # self._intercept = 0.0 if n_targets == 1: self.n_iter_ = self.n_iter_[0] self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_]) self._set_intercept(X_offset, y_offset, X_scale) self.coef_ = np.asarray(self.coef_, dtype=X.dtype) return self