#选择标准差超过0.5的特征
large_std_features_index = [
    i for i in range(len(features_std)) if features_std[i] > 0.5
]

X2 = X[:, large_std_features_index]

#第2步:利用Lasso约束下的逻辑回归模型进行变量挑选
#先在验证集上找出最好的参数C
auc_list = []
for Ci in list(range(1, 101)):
    X21, X22, y21, y22 = model_selection.train_test_split(X2, y, test_size=0.2)

    lr = RandomizedLogisticRegression(C=Ci)  # 可在此步对模型进行参数设置
    lr.fit(X21, y21)  # 训练模型,传入X、y, 数据中不能包含miss_value
    X_new = lr.inverse_transform(lr.fit_transform(X21, y21))
    #找出X_new中不全部为0的列
    zero_columns = np.sum(np.abs(X_new), axis=0)
    nonzero_columns_index = [
        i for i in range(len(zero_columns)) if zero_columns[i] > 0.0001
    ]
    X3 = X21[:, nonzero_columns_index]
    lr_best = LogisticRegression()
    lr_best.fit(X21, y21)
    prob_predict = lr_best._predict_proba_lr(X22)[:, 1]
    auc = metrics.auc(y22, prob_predict, reorder=True)
    auc_list.append(auc)

best_C_position = auc_list.index(max(auc_list))
best_C = list(range(1, 101))[best_C_position]
Exemple #2
0
# Useful sources:
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html#sklearn.linear_model.RandomizedLogisticRegression
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html#sklearn.linear_model.LogisticRegressionCV

from sklearn.linear_model import RandomizedLogisticRegression, LogisticRegression  #, LogisticRegressionCV
from sklearn.datasets import load_iris
import numpy as np

iris = load_iris()
X, y = iris.data, iris.target
print(X)
print(y)
ff_model = RandomizedLogisticRegression()  # Finds best set of features
X_new = ff_model.fit_transform(X, y)  # Fit data and get transformed input rows
print(X_new)
print(X.shape)
print(X_new.shape)
print(X[0:4])
print(ff_model.transform(
    X[0:4]))  # Transform the first 4 rows of data to get only best features
model = LogisticRegression().fit(
    X_new, y)  # Fit logistic regression with best features
print(model.predict_proba(ff_model.transform(
    X[0:4])))  # predict probabilities for first 4 rows of data
print(ff_model.inverse_transform(ff_model.transform(
    X[0:4])))  # Test inverse transforming
arr = np.array([[1, 1, 1]])
print(
    ff_model.inverse_transform(arr)
)  # Get original matrix structure with 1's only in columns of retained features.
# Useful sources:
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html#sklearn.linear_model.RandomizedLogisticRegression
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html#sklearn.linear_model.LogisticRegressionCV

from sklearn.linear_model import RandomizedLogisticRegression, LogisticRegression #, LogisticRegressionCV
from sklearn.datasets import load_iris
import numpy as np

iris = load_iris()
X, y = iris.data, iris.target
print(X)
print(y)
ff_model = RandomizedLogisticRegression() # Finds best set of features
X_new = ff_model.fit_transform(X, y)  # Fit data and get transformed input rows
print(X_new)
print(X.shape)
print(X_new.shape)
print(X[0:4])
print(ff_model.transform(X[0:4]))  # Transform the first 4 rows of data to get only best features
model = LogisticRegression().fit(X_new, y) # Fit logistic regression with best features
print(model.predict_proba(ff_model.transform(X[0:4]))) # predict probabilities for first 4 rows of data
print(ff_model.inverse_transform(ff_model.transform(X[0:4]))) # Test inverse transforming
arr = np.array([[1,1,1]])
print(ff_model.inverse_transform(arr)) # Get original matrix structure with 1's only in columns of retained features.