def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] Y = construct_label_matrix(y) n_samples, n_features = X.shape # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the feature weight matrix Weight = RFS.rfs(X[train, :], Y[train, :], gamma=0.1) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) print acc correct = correct + acc # output the average classification accuracy over all 10 folds print 'Accuracy:', float(correct)/10
selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # lasso lasso = Lasso(alpha=0.01, random_state=random_state) lasso.fit(X_train, y_train) weights = lasso.coef_.T idx = chi_square.feature_ranking(abs(weights)) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # rfs weights = RFS.rfs(X_train, construct_label_matrix(y_train), gamma=0.01) idx = sparse_learning.feature_ranking(weights) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # sgl idx_group = np.array([[1, 16, np.sqrt(16)], [17, 28, np.sqrt(12)], [29, 60, np.sqrt(32)], [61, 160, np.sqrt(100)]]).T idx_group = idx_group.astype(int) weights, _, _ = group_fs.group_fs(X_train, y_train, 0.01,
import numpy as np from sklearn.preprocessing import scale from feature_selection.mfsicc_plot import MFSICC from skfeature.utility.sparse_learning import construct_label_matrix from feature_selection.RFS import rfs import warnings warnings.filterwarnings("ignore") dataset = 'PHM09_Low_COMB.csv' ''' CWRU_HP1_COMB.csv CWRU_HP2_COMB.csv CWRU_HP3_COMB.csv PHM09_High_COMB.csv PHM09_Low_COMB.csv ''' file_group = '..//features//Groups.csv' groups = np.array(pd.read_csv(file_group))[:, 0] file_path = '..//features//' + dataset data = pd.read_csv(file_path) features = np.array(data.iloc[:, :-1]) features = scale(features) labels = np.array(data.iloc[:, -1]) labels = labels.astype(int) mfsicc = MFSICC(lamb1=0.1, alpha=0.5, lamb2=0.1, random_state=1.1, n_iter=30) mfsicc.fit(features, labels, groups) W, objs = rfs(features, construct_label_matrix(labels), gamma=0.1) print(mfsicc.get_objectives())
import numpy as np from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import StratifiedKFold from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC from sklearn.neural_network import MLPClassifier from sklearn.model_selection import cross_val_score # load data X = np.load('Lymph_x.npy') y = np.load('Lymph_y.npy') X = X.astype(float) Y = construct_label_matrix(y) n_samples, n_features = X.shape # split data into 10 folds cv = StratifiedKFold(n_splits=13) # perform evaluation on classification task num_fea = 11 # number of selected features for train, test in cv.split(X, y): # obtain the feature weight matrix Weight = RFS.rfs(X[train, :], Y[train, :], gamma=0.1) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) X_resampled = X[:, idx[0:11]]
def fit(self, X, y): idx = [] if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) elif self.tp == 'filter': if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'Fisher': # obtain the score of each feature on the training set score = fisher_score.fisher_score(X, y) # rank features in descending order according to score idx = fisher_score.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] elif self.tp == 'wrapper': model_fit = self.model.fit(X, y) model = SelectFromModel(model_fit, prefit=True) idx = model.get_support(indices=True) elif self.tp == 'SLB': # one-hot-encode on target y = construct_label_matrix(y) if self.name == 'SMBA': scba = fs.SCBA(data=X, alpha=self.params['alpha'], norm_type=self.params['norm_type'], verbose=self.params['verbose'], thr=self.params['thr'], max_iter=self.params['max_iter'], affine=self.params['affine'], normalize=self.params['normalize'], step=self.params['step'], PCA=self.params['PCA'], GPU=self.params['GPU'], device=self.params['device']) nrmInd, sInd, repInd, _ = scba.admm() if self.params['type_indices'] == 'nrmInd': idx = nrmInd elif self.params['type_indices'] == 'repInd': idx = repInd else: idx = sInd if self.name == 'RFS': W = RFS.rfs(X, y, gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'LASSO': LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) return idx
def fit(self, X, y): if self.name == 'LASSO': # print self.params['alpha'] LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 # alpha = self.params['alpha'] # alpha = .9 - ((self.params['alpha'] - 1.0) * (1 - 0.1)) / ((50 - 1) + 0.1) # print alpha enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) # if y_pred_enet.coef_ if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'RFS': W = RFS.rfs(X, construct_label_matrix(y), gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] return idx
def rfs(X, y, **kwargs): """ This function implementS efficient and robust feature selection via joint l21-norms minimization min_W||X^T W - Y||_2,1 + gamma||W||_2,1 Input ----- X: {numpy array}, shape (n_samples, n_features) input data y: {numpy array}, shape (n_samples,) input class labels kwargs: {dictionary} gamma: {float} parameter in RFS n_selected_features: {int} the maximum number of selected features returned, the default is the number of input features verbose: boolean True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_samples, n_features) feature weight matrix Reference --------- Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010. """ # default gamma is 1 gamma = kwargs.get('gamma', 0.1) verbose = kwargs.get('verbose', False) n_samples, n_features = X.shape n_selected_features = kwargs.get('n_selected_features', n_features) Y = construct_label_matrix(y) A = np.zeros((n_samples, n_samples + n_features)) A[:, 0:n_features] = X A[:, n_features:n_features + n_samples] = gamma * np.eye(n_samples) D = np.eye(n_features + n_samples) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y D_inv = LA.inv(D) temp = LA.inv( np.dot(np.dot(A, D_inv), A.T) + 1e-6 * np.eye(n_samples)) # (A D^-1 A^T)^-1 U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y) # update D as D_ii = 1 / 2 / ||U(i,:)|| D = generate_diagonal_matrix(U) obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break # the first d rows of U are the feature weights W = U[0:n_features, :] scores = (W * W).sum(1) return scores