def _get_alphas(self, x, y, l2_alpha): """Gets the Lasso alpha grid. Parameters ---------- x : `numpy.array` The design matrix. y : `numpy.array` The response vector. l2_alpha : `float` The L2 norm regularization parameter. Returns ------- l1_alpha : `list`[`float`] or None The alphas for L1 norm regularization to search from. """ lasso_input = self._get_lasso_input(x, y, l2_alpha) if lasso_input["x_lasso"].shape[1] > 0: l1_alpha = _alpha_grid(lasso_input["x_lasso"], lasso_input["y_lasso"], n_alphas=self.n_l1_alphas) else: l1_alpha = None return l1_alpha
def score_lasso(X, y, rules: List[str], alphas=None, cv=3, prediction_task='regression', max_rules=2000, random_state=None) -> Tuple[List[Rule], List[float], float]: if alphas is None: if prediction_task == 'regression': alphas = _alpha_grid(X, y) elif prediction_task == 'classification': alphas = [1 / alpha for alpha in np.logspace(-4, 4, num=10, base=10)] coef_zero_threshold = 1e-6 / np.mean(np.abs(y)) mse_cv_scores = [] nonzero_rule_coefs_count = [] kf = KFold(cv) # alphas are sorted from most reg. to least reg. for alpha in alphas: if prediction_task == 'regression': m = Lasso(alpha=alpha, random_state=random_state) else: m = LogisticRegression(penalty='l1', C=1/alpha, solver='liblinear') mse_cv = 0 for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] m.fit(X_train, y_train) mse_cv += np.mean((m.predict(X_test) - y_test) ** 2) m.fit(X, y) rule_count = np.sum(np.abs(m.coef_.flatten()) > coef_zero_threshold) if rule_count > max_rules: break nonzero_rule_coefs_count.append(rule_count) mse_cv_scores.append(mse_cv / cv) best_alpha = alphas[np.argmin(mse_cv_scores)] if prediction_task == 'regression': lscv = Lasso(alpha=best_alpha, random_state=random_state, max_iter=2000) else: lscv = LogisticRegression(penalty='l1', C=1/best_alpha, solver='liblinear', random_state=random_state, max_iter=200) lscv.fit(X, y) coef_ = lscv.coef_.flatten() coefs = list(coef_[:-len(rules)]) support = np.sum(X[:, -len(rules):], axis=0) / X.shape[0] nonzero_rules = [] for r, w, s in zip(rules, coef_[-len(rules):], support): if abs(w) > coef_zero_threshold: nonzero_rules.append(Rule(r, args=[w], support=s)) coefs.append(w) return nonzero_rules, coefs, lscv.intercept_
def find_alpha_range(X, y, n_alphas=1000): from sklearn.linear_model._coordinate_descent import _alpha_grid from sklearn.feature_selection import VarianceThreshold from sklearn.decomposition import PCA X_transform = PCA().fit_transform(VarianceThreshold().fit_transform(X)) alphas = _alpha_grid(X=X_transform, y=y, n_alphas=n_alphas) return alphas
def get_best_alpha_under_max_rules(X, y, rules: List[str], penalty='l1', prediction_task='regression', max_rules=30, random_state=None) -> float: coef_zero_threshold = 1e-6 / np.mean(np.abs(y)) alpha_scores = [] nonzero_rule_coefs_count = [] if prediction_task == 'regression': alphas = _alpha_grid(X, y) elif prediction_task == 'classification': alphas = [1 / alpha for alpha in np.logspace(-4, 4, num=10, base=10)] # alphas are sorted from most reg. to least reg. for alpha in alphas: if prediction_task == 'regression': m = Lasso(alpha=alpha, random_state=random_state) fold_scores = cross_val_score(m, X, y, cv=4, scoring='neg_mean_squared_error') alpha_scores.append(np.mean(fold_scores)) else: m = LogisticRegression(penalty=penalty, C=1 / alpha, solver='liblinear', random_state=random_state) fold_scores = cross_val_score(m, X, y, cv=4, scoring='accuracy') alpha_scores.append(np.mean(fold_scores)) m.fit(X, y) rule_coefs = m.coef_.flatten()[:X.shape[1] - len(rules)] rule_count = np.sum(np.abs(rule_coefs) > coef_zero_threshold) if rule_count > max_rules: break nonzero_rule_coefs_count.append(rule_count) # rare case in which diff alphas lead to identical scores if np.all(alpha_scores == alpha_scores[0]): best_alpha = alphas[len(alpha_scores) - 1] else: best_alpha = alphas[np.argmax(alpha_scores)] return best_alpha
def score_lasso(X, y, rules: List[str], alphas=None, cv=3, max_rules=2000, random_state=None) -> Tuple[List[Rule], Lasso]: if alphas is None: alphas = _alpha_grid(X, y) coef_zero_threshold = 1e-6 / np.mean(np.abs(y)) mse_cv_scores = [] nonzero_rule_coefs_count = [] kf = KFold(cv) for alpha in alphas: # alphas are sorted from largest to smallest m = Lasso(alpha=alpha, random_state=random_state) mse_cv = 0 for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] m.fit(X_train, y_train) mse_cv += np.mean((m.predict(X_test) - y_test)**2) m.fit(X, y) rule_count = sum(np.abs(m.coef_) > coef_zero_threshold) if rule_count > max_rules: break nonzero_rule_coefs_count.append(rule_count) mse_cv_scores.append(mse_cv / cv) best_alpha = alphas[np.argmin(mse_cv_scores)] lscv = Lasso(alpha=best_alpha, random_state=random_state, max_iter=2000) lscv.fit(X, y) coefs = list(lscv.coef_[:-len(rules)]) support = np.sum(X[:, -len(rules):], axis=0) / X.shape[0] nonzero_rules = [] for r, w, s in zip(rules, lscv.coef_[-len(rules):], support): if abs(w) > coef_zero_threshold: nonzero_rules.append(Rule(r, args=[w], support=s)) coefs.append(w) return nonzero_rules, coefs, lscv.intercept_
def admm_path(X, y, Xy=None, alphas=None, eps=1e-3, n_alphas=100, rho=1.0, max_iter=1000, tol=1e-04): _, n_features = X.shape multi_output = False n_iters = [] if y.ndim != 1: multi_output = True _, n_outputs = y.shape if alphas is None: alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=1.0, eps=eps, n_alphas=n_alphas) else: alphas = np.sort(alphas)[::-1] n_alphas = len(alphas) if not multi_output: coefs = np.zeros((n_features, n_alphas), dtype=X.dtype) else: coefs = np.zeros((n_features, n_outputs, n_alphas), dtype=X.dtype) for i, alpha in enumerate(alphas): clf = LassoADMM(alpha=alpha, rho=rho, max_iter=max_iter, tol=tol) clf.fit(X, y) coefs[..., i] = clf.coef_ n_iters.append(clf.n_iter_) return alphas, coefs, n_iters
def fit(self, X, y): """Fit linear model with coordinate descent Fit is on grid of alphas and best alpha estimated by cross-validation. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If y is mono-output, X can be sparse. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values """ # This makes sure that there is no duplication in memory. # Dealing right with copy_X is important in the following: # Multiple functions touch X and subsamples of X and can induce a # lot of duplication of memory copy_X = self.copy_X and self.fit_intercept check_y_params = dict(copy=False, dtype=[np.float64, np.float32], ensure_2d=False) if isinstance(X, np.ndarray) or sparse.isspmatrix(X): # Keep a reference to X reference_to_old_X = X # Let us not impose fortran ordering so far: it is # not useful for the cross-validation loop and will be done # by the model fitting itself # Need to validate separately here. # We can't pass multi_ouput=True because that would allow y to be # csr. We also want to allow y to be 64 or 32 but check_X_y only # allows to convert for 64. check_X_params = dict(accept_sparse='csc', dtype=[np.float64, np.float32], copy=False) X, y = self._validate_data(X, y, validate_separately=(check_X_params, check_y_params)) if sparse.isspmatrix(X): if (hasattr(reference_to_old_X, "data") and not np.may_share_memory(reference_to_old_X.data, X.data)): # X is a sparse matrix and has been copied copy_X = False elif not np.may_share_memory(reference_to_old_X, X): # X has been copied copy_X = False del reference_to_old_X else: # Need to validate separately here. # We can't pass multi_ouput=True because that would allow y to be # csr. We also want to allow y to be 64 or 32 but check_X_y only # allows to convert for 64. check_X_params = dict(accept_sparse='csc', dtype=[np.float64, np.float32], order='F', copy=copy_X) X, y = self._validate_data(X, y, validate_separately=(check_X_params, check_y_params)) copy_X = False if y.shape[0] == 0: raise ValueError("y has 0 samples: %r" % y) if not self._is_multitask(): if y.ndim > 1 and y.shape[1] > 1: raise ValueError("For multi-task outputs, use " "MultiTask%s" % self.__class__.__name__) y = column_or_1d(y, warn=True) else: if sparse.isspmatrix(X): raise TypeError("X should be dense but a sparse matrix was" "passed") elif y.ndim == 1: raise ValueError("For mono-task outputs, use " "%sCV" % self.__class__.__name__[9:]) model = self._get_estimator() if self.selection not in ["random", "cyclic"]: raise ValueError("selection should be either random or cyclic.") if X.shape[0] != y.shape[0]: raise ValueError( "X and y have inconsistent dimensions (%d != %d)" % (X.shape[0], y.shape[0])) # All LinearModelCV parameters except 'cv' are acceptable path_params = self.get_params() if 'l1_ratio' in path_params: l1_ratios = np.atleast_1d(path_params['l1_ratio']) # For the first path, we need to set l1_ratio path_params['l1_ratio'] = l1_ratios[0] else: l1_ratios = [ 1, ] path_params.pop('cv', None) path_params.pop('n_jobs', None) alphas = self.alphas n_l1_ratio = len(l1_ratios) if alphas is None: alphas = [ _alpha_grid(X, y, l1_ratio=l1_ratio, fit_intercept=self.fit_intercept, eps=self.eps, n_alphas=self.n_alphas, normalize=self.normalize, copy_X=self.copy_X) for l1_ratio in l1_ratios ] else: # Making sure alphas is properly ordered. alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1)) # We want n_alphas to be the number of alphas used for each l1_ratio. n_alphas = len(alphas[0]) path_params.update({'n_alphas': n_alphas}) path_params['copy_X'] = copy_X # We are not computing in parallel, we can modify X # inplace in the folds if effective_n_jobs(self.n_jobs) > 1: path_params['copy_X'] = False # init cross-validation generator cv = check_cv(self.cv) # Compute path for all folds and compute MSE to get the best alpha folds = list(cv.split(X, y)) best_mse = np.inf # We do a double for loop folded in one, in order to be able to # iterate in parallel on l1_ratio and folds jobs = (delayed(_path_residuals)(X, y, train, test, self.path, path_params, alphas=this_alphas, l1_ratio=this_l1_ratio, X_order='F', dtype=X.dtype.type) for this_l1_ratio, this_alphas in zip(l1_ratios, alphas) for train, test in folds) mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, **_joblib_parallel_args(prefer="threads"))(jobs) mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1)) mean_mse = np.mean(mse_paths, axis=1) self.mse_path_ = np.squeeze(np.rollaxis(mse_paths, 2, 1)) for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas, mean_mse): i_best_alpha = np.argmin(mse_alphas) this_best_mse = mse_alphas[i_best_alpha] if this_best_mse < best_mse: best_alpha = l1_alphas[i_best_alpha] best_l1_ratio = l1_ratio best_mse = this_best_mse self.l1_ratio_ = best_l1_ratio self.alpha_ = best_alpha if self.alphas is None: self.alphas_ = np.asarray(alphas) if n_l1_ratio == 1: self.alphas_ = self.alphas_[0] # Remove duplicate alphas in case alphas is provided. else: self.alphas_ = np.asarray(alphas[0]) # Refit the model with the parameters selected common_params = { name: value for name, value in self.get_params().items() if name in model.get_params() } model.set_params(**common_params) model.alpha = best_alpha model.l1_ratio = best_l1_ratio model.copy_X = copy_X precompute = getattr(self, "precompute", None) if isinstance(precompute, str) and precompute == "auto": model.precompute = False model.fit(X, y) if not hasattr(self, 'l1_ratio'): del self.l1_ratio_ self.coef_ = model.coef_ self.intercept_ = model.intercept_ self.dual_gap_ = model.dual_gap_ self.n_iter_ = model.n_iter_ return self