Exemple #1
0
# 参数初始化
data = pd.read_excel('../../data1/bankloan.xls')
# print(data1)
# print(data1.iloc[0])
x = data.iloc[:, :8]
x_data = x.values
y = data.iloc[:, 8]
y_data = y.values
# print(x)
# print(y)

# 建立特征筛选模型
rlr = RLR()  # 建立随机逻辑回归模型,筛选变量
rlr.fit(x, y)  # 训练模型
result = rlr._get_support_mask()

print(result)  # 获取特征筛选结果
# print(rlr.scores_())  # 也可以通过.scores_方法获取各个特征的分数
result = list(result)
# print(result)
print('通过随机逻辑回归模型筛选特征结束。')
print('有效特征为: %s' % ','.join(x.columns[result]))
x_data = x[x.columns[result]].values
print(x_data)

# 建立逻辑回归模型
lr = LR()
lr.fit(x_data, y_data)  # 用筛选后的特征训练模型
print('逻辑回归模型结束。')
print('逻辑回归模型的正确率为: %s' % lr.score(x_data, y_data))  # 0.8142857142857143
Exemple #2
0
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection
from sklearn.datasets import make_classification
from sklearn.linear_model import RandomizedLogisticRegression
from sklearn.linear_model import RandomizedLasso
from sklearn.datasets import make_regression
X, y = make_classification(n_samples=100,
                           n_features=100,
                           n_informative=5,
                           n_redundant=2,
                           random_state=101)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.30, random_state=101)
classifier = LogisticRegression(C=0.1, penalty='l1', random_state=101)
classifier.fit(X_train, y_train)
print("Out-of-sample accuracy: %0.3f" % classifier.score(X_test, y_test))
selector = RandomizedLogisticRegression(n_resampling=300, random_state=101)
selector.fit(X_train, y_train)
print("Variance selected: %i" % sum(selector._get_support_mask() != 0))
X_train_s = selector.transform(X_train)
X_test_s = selector.transform(X_test)
classifier.fit(X_train_s, y_train)
print("Out-of-sample accuracy: %0.3f" % classifier.score(X_test_s, y_test))
XX, yy = make_regression(n_samples=100,
                         n_features=10,
                         n_informative=4,
                         random_state=101)
rlasso = RandomizedLasso()
rlasso.fit(XX, yy)
print(list(enumerate(rlasso.scores_)))