def build_leaf(self, sample_indice): mx = self.x[sample_indice].mean(0) sx = self.x[sample_indice].std(0) + self.EPSILON nx = (self.x[sample_indice] - mx) / sx best_estimator = LassoCV(alphas=self.reg_lambda, cv=5, random_state=self.random_state) best_estimator.fit(nx, self.y[sample_indice]) best_estimator.coef_ = best_estimator.coef_ / sx best_estimator.intercept_ = best_estimator.intercept_ - np.dot(mx, best_estimator.coef_.T) xmin = np.min(np.dot(self.x[sample_indice], best_estimator.coef_) + best_estimator.intercept_) xmax = np.max(np.dot(self.x[sample_indice], best_estimator.coef_) + best_estimator.intercept_) predict_func = lambda x: np.clip(best_estimator.predict(x), xmin, xmax) best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict(self.x[sample_indice])) return predict_func, best_estimator, best_impurity
def AdaptiveLasso( X, y, CV=True, fit_intercept=fit_intercept_default, alpha=None, coef=None, #the real coefficients if know those verbose=False, n_lasso_iterations=5, ): """ Example of adaptive Lasso to produce event sparser solutions Adaptive lasso consists in computing many Lasso with feature reweighting. It's also known as iterated L1. Help with the implementation: https://gist.github.com/agramfort/1610922 --- Example 1: Using generated data ----- from sklearn.datasets import make_regression X, y, coef = make_regression(n_samples=306, n_features=8000, n_informative=50, noise=0.1, shuffle=True, coef=True, random_state=42) X /= np.sum(X ** 2, axis=0) # scale features alpha = 0.1 model_al = sklm.AdaptiveLasso( X, y, alpha = alpha, coef = coef, verbose = True ) ---- Example 2: Using simpler data ---- X,y = pdml.X_y(df_scaled,target_name) model_al = sklm.AdaptiveLasso( X, y, verbose = True ) """ if "pandas" in str(type(X)): X = X.to_numpy() if "pandas" in str(type(y)): y = y.to_numpy() # function that computes the absolute value square root of an input def g(w): return np.sqrt(np.abs(w)) #computes 1/(2*square_root(abs(w))) def gprime(w): return 1. / (2. * np.sqrt(np.abs(w)) + np.finfo(float).eps) # Or another option: # ll = 0.01 # g = lambda w: np.log(ll + np.abs(w)) # gprime = lambda w: 1. / (ll + np.abs(w)) n_samples, n_features = X.shape def p_obj(w, alpha): return 1. / (2 * n_samples) * np.sum( (y - np.dot(X, w))**2) + alpha * np.sum(g(w)) weights = np.ones(n_features) for k in range(n_lasso_iterations): X_w = X / weights[np.newaxis, :] if CV: clf = LassoCV( #alpha=alpha, fit_intercept=fit_intercept) else: clf = Lasso(alpha=alpha, fit_intercept=fit_intercept) clf.fit(X_w, y) if CV: curr_alpha = clf.alpha_ else: curr_alpha = alpha coef_ = clf.coef_ / weights weights = gprime(coef_) if verbose: print(p_obj(coef_, curr_alpha)) # should go down clf.coef_ = coef_ if verbose: X_w = X / weights[np.newaxis, :] print(f"Final R^2 score: {clf.score(X,y)}") #print(np.mean((clf.coef_ != 0.0) == (coef != 0.0))) return clf