Exemple #1
0
    def build_leaf(self, sample_indice):

        mx = self.x[sample_indice].mean(0)
        sx = self.x[sample_indice].std(0) + self.EPSILON
        nx = (self.x[sample_indice] - mx) / sx

        best_estimator = LassoCV(alphas=self.reg_lambda, cv=5, random_state=self.random_state)
        best_estimator.fit(nx, self.y[sample_indice])
        best_estimator.coef_ = best_estimator.coef_ / sx
        best_estimator.intercept_ = best_estimator.intercept_ - np.dot(mx, best_estimator.coef_.T)
        xmin = np.min(np.dot(self.x[sample_indice], best_estimator.coef_) + best_estimator.intercept_)
        xmax = np.max(np.dot(self.x[sample_indice], best_estimator.coef_) + best_estimator.intercept_)
        predict_func = lambda x: np.clip(best_estimator.predict(x), xmin, xmax)
        best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict(self.x[sample_indice]))
        return predict_func, best_estimator, best_impurity
def AdaptiveLasso(
    X,
    y,
    CV=True,
    fit_intercept=fit_intercept_default,
    alpha=None,
    coef=None,  #the real coefficients if know those
    verbose=False,
    n_lasso_iterations=5,
):
    """
    Example of adaptive Lasso to produce event sparser solutions

    Adaptive lasso consists in computing many Lasso with feature
    reweighting. It's also known as iterated L1.
    
    Help with the implementation: 

    https://gist.github.com/agramfort/1610922

    
    --- Example 1: Using generated data -----
    
    from sklearn.datasets import make_regression
    X, y, coef = make_regression(n_samples=306, n_features=8000, n_informative=50,
                    noise=0.1, shuffle=True, coef=True, random_state=42)

    X /= np.sum(X ** 2, axis=0)  # scale features
    alpha = 0.1
    
    model_al = sklm.AdaptiveLasso(
        X,
        y,
        alpha = alpha,
        coef = coef,
        verbose = True
    )
    
    ---- Example 2: Using simpler data ----
    X,y = pdml.X_y(df_scaled,target_name)
    model_al = sklm.AdaptiveLasso(
        X,
        y,
        verbose = True
    )
    

    """
    if "pandas" in str(type(X)):
        X = X.to_numpy()
    if "pandas" in str(type(y)):
        y = y.to_numpy()

    # function that computes the absolute value square root of an input
    def g(w):
        return np.sqrt(np.abs(w))

    #computes 1/(2*square_root(abs(w)))
    def gprime(w):
        return 1. / (2. * np.sqrt(np.abs(w)) + np.finfo(float).eps)

    # Or another option:
    # ll = 0.01
    # g = lambda w: np.log(ll + np.abs(w))
    # gprime = lambda w: 1. / (ll + np.abs(w))

    n_samples, n_features = X.shape

    def p_obj(w, alpha):
        return 1. / (2 * n_samples) * np.sum(
            (y - np.dot(X, w))**2) + alpha * np.sum(g(w))

    weights = np.ones(n_features)

    for k in range(n_lasso_iterations):
        X_w = X / weights[np.newaxis, :]
        if CV:
            clf = LassoCV(
                #alpha=alpha,
                fit_intercept=fit_intercept)

        else:
            clf = Lasso(alpha=alpha, fit_intercept=fit_intercept)
        clf.fit(X_w, y)
        if CV:
            curr_alpha = clf.alpha_
        else:
            curr_alpha = alpha

        coef_ = clf.coef_ / weights
        weights = gprime(coef_)
        if verbose:
            print(p_obj(coef_, curr_alpha))  # should go down

    clf.coef_ = coef_
    if verbose:
        X_w = X / weights[np.newaxis, :]

        print(f"Final R^2 score: {clf.score(X,y)}")
    #print(np.mean((clf.coef_ != 0.0) == (coef != 0.0)))

    return clf