def get_cascaded_sel_idx(high_th_year, low_th_year, feature_list, set_feature, sel_feature_num, div_ratio=4): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set if len(set_feature): trn_x = trn_x[:, set_feature] #val_x = val_x[:,set_feature] feature_num = trn_x.shape[1] if sel_feature_num == 0: sel_gene_num = int( max(sel_feature_num, feature_num / div_ratio)) else: sel_gene_num = sel_feature_num sort_idx = trace_ratio.trace_ratio(trn_x, trn_y, mode='index') sel_idx = sort_idx[:sel_gene_num] return sel_idx
def trace(): before = datetime.datetime.now() result = trace_ratio.trace_ratio(data, labels, mode="index", n_selected_features=treshold) after = datetime.datetime.now() print("Trace ratio") # result = result[:treshold] print(len(result)) print("cas: " + str(after - before)) print('\n') if len(result) < len(header): transform_and_save(result, "Trace_ratio")
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set feature_num = trn_x.shape[1] sort_idx = trace_ratio.trace_ratio(trn_x, trn_y, mode='index') return sort_idx[:sel_feature_num]
def test_trace_ratio(): from sklearn.datasets import make_classification X, y = make_classification(n_samples=200, n_features=20, n_informative=5, n_redundant=5, n_classes=2) X = X.astype(float) n_samples, n_features = X.shape # number of samples and number of features num_fea = 5 #parameters = { # "select_top_k__n_selected_features": [num_fea] #} assert (trace_ratio.trace_ratio(X, y, n_selected_features=5), True)
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the index of selected features idx, feature_score, subset_score = trace_ratio.trace_ratio( X[train], y[train], num_fea, style='fisher') # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print('Accuracy:', float(correct) / 10)
def trace_ratio_FS(X, train_index, y_train): _, k = X.shape feature_idx, feature_score, subset_score = trace_ratio.trace_ratio( X[train_index], y_train, k, style='fisher') return (feature_idx, feature_score)
elif fsMethod == 'JMI': featSelected, J_CMI, MIfy = jmi(X_dis,Y,n_selected_features=maxNumSelFeatures) elif fsMethod == 'MRMR': featSelected, J_CMI, MIfy = mrmr(X_dis,Y,n_selected_features=maxNumSelFeatures) elif fsMethod == 'MIM': featSelected, J_CMI, MIfy = mim(X_dis,Y,n_selected_features=maxNumSelFeatures) elif fsMethod == 'MRI': featSelected = mri(X_dis,Y,n_selected_features=maxNumSelFeatures) elif fsMethod == 'MIFS': featSelected, J_CMI, MIfy = mifs(X_dis,Y,n_selected_features=maxNumSelFeatures,beta=1) elif fsMethod == 'CIFE': featSelected, J_CMI, MIfy = cife(X_dis,Y,n_selected_features=maxNumSelFeatures) elif fsMethod == 'CMIM': featSelected, J_CMI, MIfy = cmim(X_dis,Y,n_selected_features=maxNumSelFeatures) elif fsMethod == 'trace_ratio': featSelected, feature_score, subset_score = trace_ratio(X_dis,Y,n_selected_features=maxNumSelFeatures) else: print('The feature selection method %s is not supported' %fsMethod) assert(False) time1 = time.time() filename = "results/sel_features/selFeatures_%s_dataset_%s.csv" %(fsMethod,datName) if fsMethod == 'VMIrm' or fsMethod == 'VMIgm' or fsMethod == 'VMIin' \ or fsMethod == 'JMIrm' or fsMethod == 'MRMRrm' or fsMethod == 'RMRMRrm': featSelected = genfromtxt(filename, delimiter=',',dtype=int) print(featSelected) else: fileSaving(filename, featSelected, 'w') print('Features selected by %s on dataset %s:' % (fsMethod, datName)) print(featSelected)
def trace(train, test, K): idx, _, _ = trace_ratio.trace_ratio(train[0], train[1], K, style='fisher') return idx