def larscv(): X, y = make_regression(n_samples=200,n_features=10, noise=4.0, random_state=0) reg = LarsCV(cv=2).fit(X, y) print(reg.score(X, y) ) print(X[:,0].shape,y.shape) plt.plot(X[:,0], y) plt.scatter(X[:,0], y) plt.show()
print(X.columns) ################## Use LarsCV for hyperparameter optimization (wrapper) #LARS works by starting with one variable, increasing its corresponding coefficient, and when the residual has #correlation with some other variable as much as it does the variable you started with, adding that in #and increasing in the joint least squares direction (find through fitting just those variables??), iterating. #This is a feature selection method because at the end you will find some coefficients are 0. # Instantiate lars_mod = LarsCV(cv=5, normalize=False) # Fit feat_selector = lars_mod.fit(X, y) # Print r-squared score and estimated alpha print(lars_mod.score(X, y)) print(lars_mod.alpha_) ################# Using a RandomForestRegressor for feature selection (Tree-based methods) #The way feature importance is calculated: Create trees. Then take one feature variable, #permute it randomly (shuffle), then rerun the observations through the trees. Calculate #the rate of misclassification. The % increase of misclassification rate gives the feature importance #https://link.springer.com/article/10.1023/A:1010933404324 # Instantiate rf_mod = RandomForestRegressor(max_depth=2, random_state=123, n_estimators=100, oob_score=True) # Fit rf_mod.fit(X, y)