def ll_l21_FS(X_train, y, train_index): Y = construct_label_matrix_pan(y) Y_train = Y[train_index] Weight, obj, value_gamma = ll_l21.proximal_gradient_descent(X_train, Y[train_index], 0.1, verbose=False) #print("weight ",Weight) idx = feature_ranking(Weight) return (idx, Weight)
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] Y = construct_label_matrix_pan(y) n_samples, n_features = X.shape # number of samples and number of features X = X[:200] Y = Y[:200] # split data into 10 folds # ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) ss = KFold(n_splits=10) # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss.split(X): # obtain the feature weight matrix Weight, obj, value_gamma = ll_l21.proximal_gradient_descent( X[train], Y[train], 0.1, verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print('Accuracy:', float(correct) / 10)
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set W, _, _ = ll_l21.proximal_gradient_descent(trn_x, trn_y, z=0.01, mode='raw') sort_idx = feature_ranking(W) return sort_idx[:sel_feature_num]
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] Y = construct_label_matrix_pan(y) n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the feature weight matrix Weight, obj, value_gamma = ll_l21.proximal_gradient_descent(X[train], Y[train], 0.1, verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print 'Accuracy:', float(correct)/10
def fit(self, X, y): idx = [] if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) elif self.tp == 'filter': if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'Fisher': # obtain the score of each feature on the training set score = fisher_score.fisher_score(X, y) # rank features in descending order according to score idx = fisher_score.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] elif self.tp == 'wrapper': model_fit = self.model.fit(X, y) model = SelectFromModel(model_fit, prefit=True) idx = model.get_support(indices=True) elif self.tp == 'SLB': # one-hot-encode on target y = construct_label_matrix(y) if self.name == 'SMBA': scba = fs.SCBA(data=X, alpha=self.params['alpha'], norm_type=self.params['norm_type'], verbose=self.params['verbose'], thr=self.params['thr'], max_iter=self.params['max_iter'], affine=self.params['affine'], normalize=self.params['normalize'], step=self.params['step'], PCA=self.params['PCA'], GPU=self.params['GPU'], device=self.params['device']) nrmInd, sInd, repInd, _ = scba.admm() if self.params['type_indices'] == 'nrmInd': idx = nrmInd elif self.params['type_indices'] == 'repInd': idx = repInd else: idx = sInd if self.name == 'RFS': W = RFS.rfs(X, y, gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'LASSO': LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) return idx
def main(): # load data # mat = scipy.io.loadmat('../data/COIL20.mat') # X = mat['X'] # data # X = X.astype(float) # y = mat['Y'] # label # print(y) # print(y.shape) # y = y[:, 0] # print(y) # print(y.shape) sequence_name = 'D3' feature_model_num = 0 img_feature_path = '/usr/luopengting/shareHoldersWithGPU_a/luopengting/workplace/python/pytorch/breast_cancer_lymph/data/' \ + sequence_name + '/model_' + str(feature_model_num) + '_img_feature.txt' img_label_path = '/usr/luopengting/shareHoldersWithGPU_a/luopengting/workplace/python/pytorch/breast_cancer_lymph/data/' \ + sequence_name + '/labels.txt' train = pd.read_csv(img_feature_path, sep=' ', header=None) train_labels = pd.read_csv(img_label_path, sep='\t', header=None) X_train = train.as_matrix(columns=None) y_train = train_labels.as_matrix(columns=None) # y.astype(int) y_train = y_train[:, 0] # print(y) ss = KFold(n_splits=5) # perform kmeans clustering based on the selected features and repeats 20 times nmi_total = 0 acc_total = 0 for train, test in ss.split(X_train): X = X_train[train] y = y_train[train] # obtain the feature weight matrix Weight, obj, value_gamma = ll_l21.proximal_gradient_descent( X, y, 0.1, verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # perform evaluation on clustering task num_fea = 100 # number of selected features # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] classifier = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight='balanced', random_state=None, solver='liblinear', max_iter=100, multi_class='ovr', verbose=0, warm_start=False, n_jobs=1) classifier.fit(selected_features, y) X_test = X_train[test] X_test = X_test[:, idx[0:num_fea]] Y_test = y_train[test] Y_pred = classifier.predict(X_test) from sklearn.metrics import confusion_matrix confusion_matrix = confusion_matrix(Y_test, Y_pred) print(confusion_matrix)
def fit(self, X, y): if self.name == 'LASSO': # print self.params['alpha'] LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 # alpha = self.params['alpha'] # alpha = .9 - ((self.params['alpha'] - 1.0) * (1 - 0.1)) / ((50 - 1) + 0.1) # print alpha enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) # if y_pred_enet.coef_ if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'RFS': W = RFS.rfs(X, construct_label_matrix(y), gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] return idx