# 参数初始化 data = pd.read_excel('../../data1/bankloan.xls') # print(data1) # print(data1.iloc[0]) x = data.iloc[:, :8] x_data = x.values y = data.iloc[:, 8] y_data = y.values # print(x) # print(y) # 建立特征筛选模型 rlr = RLR() # 建立随机逻辑回归模型,筛选变量 rlr.fit(x, y) # 训练模型 result = rlr._get_support_mask() print(result) # 获取特征筛选结果 # print(rlr.scores_()) # 也可以通过.scores_方法获取各个特征的分数 result = list(result) # print(result) print('通过随机逻辑回归模型筛选特征结束。') print('有效特征为: %s' % ','.join(x.columns[result])) x_data = x[x.columns[result]].values print(x_data) # 建立逻辑回归模型 lr = LR() lr.fit(x_data, y_data) # 用筛选后的特征训练模型 print('逻辑回归模型结束。') print('逻辑回归模型的正确率为: %s' % lr.score(x_data, y_data)) # 0.8142857142857143
from sklearn.linear_model import LogisticRegression from sklearn import model_selection from sklearn.datasets import make_classification from sklearn.linear_model import RandomizedLogisticRegression from sklearn.linear_model import RandomizedLasso from sklearn.datasets import make_regression X, y = make_classification(n_samples=100, n_features=100, n_informative=5, n_redundant=2, random_state=101) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.30, random_state=101) classifier = LogisticRegression(C=0.1, penalty='l1', random_state=101) classifier.fit(X_train, y_train) print("Out-of-sample accuracy: %0.3f" % classifier.score(X_test, y_test)) selector = RandomizedLogisticRegression(n_resampling=300, random_state=101) selector.fit(X_train, y_train) print("Variance selected: %i" % sum(selector._get_support_mask() != 0)) X_train_s = selector.transform(X_train) X_test_s = selector.transform(X_test) classifier.fit(X_train_s, y_train) print("Out-of-sample accuracy: %0.3f" % classifier.score(X_test_s, y_test)) XX, yy = make_regression(n_samples=100, n_features=10, n_informative=4, random_state=101) rlasso = RandomizedLasso() rlasso.fit(XX, yy) print(list(enumerate(rlasso.scores_)))