Example #1
0
def larscv():
    X, y = make_regression(n_samples=200,n_features=10, noise=4.0, random_state=0)
    reg = LarsCV(cv=2).fit(X, y)
    print(reg.score(X, y) )
    print(X[:,0].shape,y.shape)
    plt.plot(X[:,0], y)
    plt.scatter(X[:,0], y)
    plt.show()
print(X.columns)

################## Use LarsCV for hyperparameter optimization (wrapper)
#LARS works by starting with one variable, increasing its corresponding coefficient, and when the residual has 
#correlation with some other variable as much as it does the variable you started with, adding that in 
#and increasing in the joint least squares direction (find through fitting just those variables??), iterating. 
#This is a feature selection method because at the end you will find some coefficients are 0. 

# Instantiate
lars_mod = LarsCV(cv=5, normalize=False)

# Fit
feat_selector = lars_mod.fit(X, y)

# Print r-squared score and estimated alpha
print(lars_mod.score(X, y))
print(lars_mod.alpha_)

################# Using a RandomForestRegressor for feature selection (Tree-based methods)
#The way feature importance is calculated: Create trees. Then take one feature variable, 
#permute it randomly (shuffle), then rerun the observations through the trees. Calculate 
#the rate of misclassification. The % increase of misclassification rate gives the feature importance
#https://link.springer.com/article/10.1023/A:1010933404324

# Instantiate
rf_mod = RandomForestRegressor(max_depth=2, random_state=123, 
              n_estimators=100, oob_score=True)

# Fit
rf_mod.fit(X, y)