def _get_alphas(self, x, y, l2_alpha):
        """Gets the Lasso alpha grid.

        Parameters
        ----------
        x : `numpy.array`
            The design matrix.
        y : `numpy.array`
            The response vector.
        l2_alpha : `float`
            The L2 norm regularization parameter.

        Returns
        -------
        l1_alpha : `list`[`float`] or None
            The alphas for L1 norm regularization to search from.
        """
        lasso_input = self._get_lasso_input(x, y, l2_alpha)
        if lasso_input["x_lasso"].shape[1] > 0:
            l1_alpha = _alpha_grid(lasso_input["x_lasso"],
                                   lasso_input["y_lasso"],
                                   n_alphas=self.n_l1_alphas)
        else:
            l1_alpha = None
        return l1_alpha
Beispiel #2
0
def score_lasso(X, y, rules: List[str], alphas=None, cv=3,
                prediction_task='regression',
                max_rules=2000, random_state=None) -> Tuple[List[Rule], List[float], float]:
    if alphas is None:
        if prediction_task == 'regression':
            alphas = _alpha_grid(X, y)
        elif prediction_task == 'classification':
            alphas = [1 / alpha
                     for alpha in np.logspace(-4, 4, num=10, base=10)]

    coef_zero_threshold = 1e-6 / np.mean(np.abs(y))
    mse_cv_scores = []
    nonzero_rule_coefs_count = []
    kf = KFold(cv)
    
    # alphas are sorted from most reg. to least reg.
    for alpha in alphas: 
        
        if prediction_task == 'regression':
            m = Lasso(alpha=alpha, random_state=random_state)
        else:
            m = LogisticRegression(penalty='l1', C=1/alpha, solver='liblinear')
        mse_cv = 0
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            m.fit(X_train, y_train)
            mse_cv += np.mean((m.predict(X_test) - y_test) ** 2)
        
        m.fit(X, y)
        
        rule_count = np.sum(np.abs(m.coef_.flatten()) > coef_zero_threshold)
        if rule_count > max_rules:
            break
        nonzero_rule_coefs_count.append(rule_count)
        mse_cv_scores.append(mse_cv / cv)
    
    best_alpha = alphas[np.argmin(mse_cv_scores)]
    if prediction_task == 'regression':
        lscv = Lasso(alpha=best_alpha, random_state=random_state, max_iter=2000)
    else:
        lscv = LogisticRegression(penalty='l1', C=1/best_alpha, solver='liblinear',
                                  random_state=random_state, max_iter=200)
    lscv.fit(X, y)

    coef_ = lscv.coef_.flatten()
    coefs = list(coef_[:-len(rules)])
    support = np.sum(X[:, -len(rules):], axis=0) / X.shape[0]

    nonzero_rules = []
    for r, w, s in zip(rules, coef_[-len(rules):], support):
        if abs(w) > coef_zero_threshold:
            nonzero_rules.append(Rule(r, args=[w], support=s))
            coefs.append(w)
    
    return nonzero_rules, coefs, lscv.intercept_
Beispiel #3
0
def find_alpha_range(X, y, n_alphas=1000):

    from sklearn.linear_model._coordinate_descent import _alpha_grid
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA

    X_transform = PCA().fit_transform(VarianceThreshold().fit_transform(X))
    alphas = _alpha_grid(X=X_transform, y=y, n_alphas=n_alphas)

    return alphas
Beispiel #4
0
def get_best_alpha_under_max_rules(X,
                                   y,
                                   rules: List[str],
                                   penalty='l1',
                                   prediction_task='regression',
                                   max_rules=30,
                                   random_state=None) -> float:
    coef_zero_threshold = 1e-6 / np.mean(np.abs(y))
    alpha_scores = []
    nonzero_rule_coefs_count = []

    if prediction_task == 'regression':
        alphas = _alpha_grid(X, y)
    elif prediction_task == 'classification':
        alphas = [1 / alpha for alpha in np.logspace(-4, 4, num=10, base=10)]

    # alphas are sorted from most reg. to least reg.
    for alpha in alphas:

        if prediction_task == 'regression':
            m = Lasso(alpha=alpha, random_state=random_state)
            fold_scores = cross_val_score(m,
                                          X,
                                          y,
                                          cv=4,
                                          scoring='neg_mean_squared_error')
            alpha_scores.append(np.mean(fold_scores))
        else:
            m = LogisticRegression(penalty=penalty,
                                   C=1 / alpha,
                                   solver='liblinear',
                                   random_state=random_state)
            fold_scores = cross_val_score(m, X, y, cv=4, scoring='accuracy')
            alpha_scores.append(np.mean(fold_scores))

        m.fit(X, y)

        rule_coefs = m.coef_.flatten()[:X.shape[1] - len(rules)]
        rule_count = np.sum(np.abs(rule_coefs) > coef_zero_threshold)
        if rule_count > max_rules:
            break
        nonzero_rule_coefs_count.append(rule_count)

    # rare case in which diff alphas lead to identical scores
    if np.all(alpha_scores == alpha_scores[0]):
        best_alpha = alphas[len(alpha_scores) - 1]
    else:
        best_alpha = alphas[np.argmax(alpha_scores)]

    return best_alpha
Beispiel #5
0
def score_lasso(X,
                y,
                rules: List[str],
                alphas=None,
                cv=3,
                max_rules=2000,
                random_state=None) -> Tuple[List[Rule], Lasso]:
    if alphas is None:
        alphas = _alpha_grid(X, y)

    coef_zero_threshold = 1e-6 / np.mean(np.abs(y))
    mse_cv_scores = []
    nonzero_rule_coefs_count = []
    kf = KFold(cv)
    for alpha in alphas:  # alphas are sorted from largest to smallest
        m = Lasso(alpha=alpha, random_state=random_state)
        mse_cv = 0
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            m.fit(X_train, y_train)
            mse_cv += np.mean((m.predict(X_test) - y_test)**2)

        m.fit(X, y)

        rule_count = sum(np.abs(m.coef_) > coef_zero_threshold)
        if rule_count > max_rules:
            break
        nonzero_rule_coefs_count.append(rule_count)
        mse_cv_scores.append(mse_cv / cv)

    best_alpha = alphas[np.argmin(mse_cv_scores)]
    lscv = Lasso(alpha=best_alpha, random_state=random_state, max_iter=2000)
    lscv.fit(X, y)

    coefs = list(lscv.coef_[:-len(rules)])
    support = np.sum(X[:, -len(rules):], axis=0) / X.shape[0]

    nonzero_rules = []
    for r, w, s in zip(rules, lscv.coef_[-len(rules):], support):
        if abs(w) > coef_zero_threshold:
            nonzero_rules.append(Rule(r, args=[w], support=s))
            coefs.append(w)

    return nonzero_rules, coefs, lscv.intercept_
Beispiel #6
0
def admm_path(X,
              y,
              Xy=None,
              alphas=None,
              eps=1e-3,
              n_alphas=100,
              rho=1.0,
              max_iter=1000,
              tol=1e-04):
    _, n_features = X.shape
    multi_output = False
    n_iters = []

    if y.ndim != 1:
        multi_output = True
        _, n_outputs = y.shape

    if alphas is None:
        alphas = _alpha_grid(X,
                             y,
                             Xy=Xy,
                             l1_ratio=1.0,
                             eps=eps,
                             n_alphas=n_alphas)
    else:
        alphas = np.sort(alphas)[::-1]
        n_alphas = len(alphas)

    if not multi_output:
        coefs = np.zeros((n_features, n_alphas), dtype=X.dtype)
    else:
        coefs = np.zeros((n_features, n_outputs, n_alphas), dtype=X.dtype)

    for i, alpha in enumerate(alphas):
        clf = LassoADMM(alpha=alpha, rho=rho, max_iter=max_iter, tol=tol)
        clf.fit(X, y)
        coefs[..., i] = clf.coef_
        n_iters.append(clf.n_iter_)

    return alphas, coefs, n_iters
Beispiel #7
0
    def fit(self, X, y):
        """Fit linear model with coordinate descent

        Fit is on grid of alphas and best alpha estimated by cross-validation.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data. Pass directly as Fortran-contiguous data
            to avoid unnecessary memory duplication. If y is mono-output,
            X can be sparse.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values
        """
        # This makes sure that there is no duplication in memory.
        # Dealing right with copy_X is important in the following:
        # Multiple functions touch X and subsamples of X and can induce a
        # lot of duplication of memory
        copy_X = self.copy_X and self.fit_intercept

        check_y_params = dict(copy=False,
                              dtype=[np.float64, np.float32],
                              ensure_2d=False)
        if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
            # Keep a reference to X
            reference_to_old_X = X
            # Let us not impose fortran ordering so far: it is
            # not useful for the cross-validation loop and will be done
            # by the model fitting itself

            # Need to validate separately here.
            # We can't pass multi_ouput=True because that would allow y to be
            # csr. We also want to allow y to be 64 or 32 but check_X_y only
            # allows to convert for 64.
            check_X_params = dict(accept_sparse='csc',
                                  dtype=[np.float64, np.float32],
                                  copy=False)
            X, y = self._validate_data(X,
                                       y,
                                       validate_separately=(check_X_params,
                                                            check_y_params))
            if sparse.isspmatrix(X):
                if (hasattr(reference_to_old_X, "data")
                        and not np.may_share_memory(reference_to_old_X.data,
                                                    X.data)):
                    # X is a sparse matrix and has been copied
                    copy_X = False
            elif not np.may_share_memory(reference_to_old_X, X):
                # X has been copied
                copy_X = False
            del reference_to_old_X
        else:
            # Need to validate separately here.
            # We can't pass multi_ouput=True because that would allow y to be
            # csr. We also want to allow y to be 64 or 32 but check_X_y only
            # allows to convert for 64.
            check_X_params = dict(accept_sparse='csc',
                                  dtype=[np.float64, np.float32],
                                  order='F',
                                  copy=copy_X)
            X, y = self._validate_data(X,
                                       y,
                                       validate_separately=(check_X_params,
                                                            check_y_params))
            copy_X = False

        if y.shape[0] == 0:
            raise ValueError("y has 0 samples: %r" % y)

        if not self._is_multitask():
            if y.ndim > 1 and y.shape[1] > 1:
                raise ValueError("For multi-task outputs, use "
                                 "MultiTask%s" % self.__class__.__name__)
            y = column_or_1d(y, warn=True)
        else:
            if sparse.isspmatrix(X):
                raise TypeError("X should be dense but a sparse matrix was"
                                "passed")
            elif y.ndim == 1:
                raise ValueError("For mono-task outputs, use "
                                 "%sCV" % self.__class__.__name__[9:])

        model = self._get_estimator()

        if self.selection not in ["random", "cyclic"]:
            raise ValueError("selection should be either random or cyclic.")

        if X.shape[0] != y.shape[0]:
            raise ValueError(
                "X and y have inconsistent dimensions (%d != %d)" %
                (X.shape[0], y.shape[0]))

        # All LinearModelCV parameters except 'cv' are acceptable
        path_params = self.get_params()
        if 'l1_ratio' in path_params:
            l1_ratios = np.atleast_1d(path_params['l1_ratio'])
            # For the first path, we need to set l1_ratio
            path_params['l1_ratio'] = l1_ratios[0]
        else:
            l1_ratios = [
                1,
            ]
        path_params.pop('cv', None)
        path_params.pop('n_jobs', None)

        alphas = self.alphas
        n_l1_ratio = len(l1_ratios)
        if alphas is None:
            alphas = [
                _alpha_grid(X,
                            y,
                            l1_ratio=l1_ratio,
                            fit_intercept=self.fit_intercept,
                            eps=self.eps,
                            n_alphas=self.n_alphas,
                            normalize=self.normalize,
                            copy_X=self.copy_X) for l1_ratio in l1_ratios
            ]
        else:
            # Making sure alphas is properly ordered.
            alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))
        # We want n_alphas to be the number of alphas used for each l1_ratio.
        n_alphas = len(alphas[0])
        path_params.update({'n_alphas': n_alphas})

        path_params['copy_X'] = copy_X
        # We are not computing in parallel, we can modify X
        # inplace in the folds
        if effective_n_jobs(self.n_jobs) > 1:
            path_params['copy_X'] = False

        # init cross-validation generator
        cv = check_cv(self.cv)

        # Compute path for all folds and compute MSE to get the best alpha
        folds = list(cv.split(X, y))
        best_mse = np.inf

        # We do a double for loop folded in one, in order to be able to
        # iterate in parallel on l1_ratio and folds
        jobs = (delayed(_path_residuals)(X,
                                         y,
                                         train,
                                         test,
                                         self.path,
                                         path_params,
                                         alphas=this_alphas,
                                         l1_ratio=this_l1_ratio,
                                         X_order='F',
                                         dtype=X.dtype.type)
                for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)
                for train, test in folds)
        mse_paths = Parallel(n_jobs=self.n_jobs,
                             verbose=self.verbose,
                             **_joblib_parallel_args(prefer="threads"))(jobs)
        mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1))
        mean_mse = np.mean(mse_paths, axis=1)
        self.mse_path_ = np.squeeze(np.rollaxis(mse_paths, 2, 1))
        for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas,
                                                   mean_mse):
            i_best_alpha = np.argmin(mse_alphas)
            this_best_mse = mse_alphas[i_best_alpha]
            if this_best_mse < best_mse:
                best_alpha = l1_alphas[i_best_alpha]
                best_l1_ratio = l1_ratio
                best_mse = this_best_mse

        self.l1_ratio_ = best_l1_ratio
        self.alpha_ = best_alpha
        if self.alphas is None:
            self.alphas_ = np.asarray(alphas)
            if n_l1_ratio == 1:
                self.alphas_ = self.alphas_[0]
        # Remove duplicate alphas in case alphas is provided.
        else:
            self.alphas_ = np.asarray(alphas[0])

        # Refit the model with the parameters selected
        common_params = {
            name: value
            for name, value in self.get_params().items()
            if name in model.get_params()
        }
        model.set_params(**common_params)
        model.alpha = best_alpha
        model.l1_ratio = best_l1_ratio
        model.copy_X = copy_X
        precompute = getattr(self, "precompute", None)
        if isinstance(precompute, str) and precompute == "auto":
            model.precompute = False
        model.fit(X, y)
        if not hasattr(self, 'l1_ratio'):
            del self.l1_ratio_
        self.coef_ = model.coef_
        self.intercept_ = model.intercept_
        self.dual_gap_ = model.dual_gap_
        self.n_iter_ = model.n_iter_
        return self