#选择标准差超过0.5的特征 large_std_features_index = [ i for i in range(len(features_std)) if features_std[i] > 0.5 ] X2 = X[:, large_std_features_index] #第2步:利用Lasso约束下的逻辑回归模型进行变量挑选 #先在验证集上找出最好的参数C auc_list = [] for Ci in list(range(1, 101)): X21, X22, y21, y22 = model_selection.train_test_split(X2, y, test_size=0.2) lr = RandomizedLogisticRegression(C=Ci) # 可在此步对模型进行参数设置 lr.fit(X21, y21) # 训练模型,传入X、y, 数据中不能包含miss_value X_new = lr.inverse_transform(lr.fit_transform(X21, y21)) #找出X_new中不全部为0的列 zero_columns = np.sum(np.abs(X_new), axis=0) nonzero_columns_index = [ i for i in range(len(zero_columns)) if zero_columns[i] > 0.0001 ] X3 = X21[:, nonzero_columns_index] lr_best = LogisticRegression() lr_best.fit(X21, y21) prob_predict = lr_best._predict_proba_lr(X22)[:, 1] auc = metrics.auc(y22, prob_predict, reorder=True) auc_list.append(auc) best_C_position = auc_list.index(max(auc_list)) best_C = list(range(1, 101))[best_C_position]
# Useful sources: # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html#sklearn.linear_model.RandomizedLogisticRegression # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html#sklearn.linear_model.LogisticRegressionCV from sklearn.linear_model import RandomizedLogisticRegression, LogisticRegression #, LogisticRegressionCV from sklearn.datasets import load_iris import numpy as np iris = load_iris() X, y = iris.data, iris.target print(X) print(y) ff_model = RandomizedLogisticRegression() # Finds best set of features X_new = ff_model.fit_transform(X, y) # Fit data and get transformed input rows print(X_new) print(X.shape) print(X_new.shape) print(X[0:4]) print(ff_model.transform( X[0:4])) # Transform the first 4 rows of data to get only best features model = LogisticRegression().fit( X_new, y) # Fit logistic regression with best features print(model.predict_proba(ff_model.transform( X[0:4]))) # predict probabilities for first 4 rows of data print(ff_model.inverse_transform(ff_model.transform( X[0:4]))) # Test inverse transforming arr = np.array([[1, 1, 1]]) print( ff_model.inverse_transform(arr) ) # Get original matrix structure with 1's only in columns of retained features.
# Useful sources: # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html#sklearn.linear_model.RandomizedLogisticRegression # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html#sklearn.linear_model.LogisticRegressionCV from sklearn.linear_model import RandomizedLogisticRegression, LogisticRegression #, LogisticRegressionCV from sklearn.datasets import load_iris import numpy as np iris = load_iris() X, y = iris.data, iris.target print(X) print(y) ff_model = RandomizedLogisticRegression() # Finds best set of features X_new = ff_model.fit_transform(X, y) # Fit data and get transformed input rows print(X_new) print(X.shape) print(X_new.shape) print(X[0:4]) print(ff_model.transform(X[0:4])) # Transform the first 4 rows of data to get only best features model = LogisticRegression().fit(X_new, y) # Fit logistic regression with best features print(model.predict_proba(ff_model.transform(X[0:4]))) # predict probabilities for first 4 rows of data print(ff_model.inverse_transform(ff_model.transform(X[0:4]))) # Test inverse transforming arr = np.array([[1,1,1]]) print(ff_model.inverse_transform(arr)) # Get original matrix structure with 1's only in columns of retained features.