def dimensionality_reduction(X_train, X_test, y_train, n_features, method): if method == "ReliefF": #produção do vetor de scores que será utilizado para seleção dos atributos. score = reliefF.reliefF(X_train, y_train) #indice dos atributos de acordo com o ranking feito pelo score. index = reliefF.feature_ranking(score) #atribuição das n_features que agora serão utilizadas em X_train e X_test X_train = X_train[:, index[0:n_features]] X_test = X_test[:, index[0:n_features]] elif method == "LDA": # Applying LDA lda = LDA(n_components=n_features) X_train = lda.fit_transform(X_train, y_train) X_test = lda.transform(X_test) elif method == "PCA": # Applying PCA pca = PCA(n_components=n_features) X_train = pca.fit_transform(X_train) X_test = pca.transform(X_test) #explained_variance = pca.explained_variance_ratio_ elif method == "KernelPCA": # Applying Kernel PCA kpca = KernelPCA(n_components=n_features, kernel='rbf') X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) return (X_train, X_test)
def reliefPostProc(X, y): scores = reliefF.reliefF(X, y) indexes = range(0, len(scores)) pairedScores = zip(scores, indexes) pairedScores = sorted(pairedScores, reverse=True) return np.array([eaPair[1] for eaPair in pairedScores][:numFeatsFn(n_feats)])
def reliefPostProc(X, y): n_feats = len(X[0]) numFeatsFn = lambda n: int(ceil(sqrt(n_feats))) scores = reliefF.reliefF(X,y) indexes = range(0, len(scores)) pairedScores = zip(scores, indexes) pairedScores = sorted(pairedScores, reverse=True) return np.array([ eaPair[1] for eaPair in pairedScores][:numFeatsFn(n_feats)])
def relieff(): before = datetime.datetime.now() result = reliefF.reliefF(data, labels, mode="index") after = datetime.datetime.now() print("relieff") result = result[:treshold] print(len(result)) print("cas: " + str(after - before)) print('\n') if len(result) < len(header): transform_and_save(result, "ReliefF")
def getFeatureWeights(CBproblems, CBsolutions, metric='chi_square'): y = CBsolutions if metric == 'chi_square': weights = chi2(CBproblems, y) if metric == 'reliefF': weights = reliefF.reliefF(CBproblems, y) weights = np.nan_to_num(weights) min = np.min(weights) max = np.max(weights) weights = np.subtract(weights, min) weights = np.divide(weights, (max - min)) return weights
def relieF(data): rank = [] for i in range(6): X = data[i][:, :-1] Y = data[i][:, -1] score = reliefF.reliefF(X, Y) idx1 = reliefF.feature_ranking(score) idx = samp(idx1.tolist()) rank.append(idx) m = agg.instant_runoff(rank) R = [int(i) for i in m] return R
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set sort_idx = reliefF.reliefF(trn_x, trn_y, mode='index') return sort_idx[:sel_feature_num]
def main(): # load data mat = scipy.io.loadmat('../data/nci9.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds #ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) ss = LeaveOneOut() # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC(random_state=42) # linear SVM score = reliefF.reliefF(X, y) idx = reliefF.feature_ranking(score) selected_features = X[:, idx[0:num_fea]] correct = 0 y_pred = [] for train, test in ss.split(X): # obtain the score of each feature on the training set #score = reliefF.reliefF(X[train], y[train]) # rank features in descending order according to score #idx = reliefF.feature_ranking(score) # obtain the dataset on the selected features # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data #y_predict = clf.predict(selected_features[test]) y_pred.append(clf.predict(selected_features[test])) # obtain the classification accuracy on the test data #acc = accuracy_score(y[test], y_predict) #correct = correct + acc # output the average classification accuracy over all 10 folds #print('Accuracy:', float(correct)/10) print(accuracy_score(y, y_pred))
def run_fold(trial,P,X,y,method,dataset,parttype): print 'Obtaining features for %s %s %s fold: %2d' % (parttype,method,dataset,trial) n_samples, n_features = X.shape train = P[:,trial] == 1 trnX = X[train] trnY = y[train] start_time = time.time() if method == 'fisher': score = fisher_score.fisher_score(trnX,trnY) features = fisher_score.feature_ranking(score) elif method == 'chi2': score = chi_square.chi_square(trnX,trnY) features = chi_square.feature_ranking(score) elif method == 'relieff': score = reliefF.reliefF(trnX,trnY) features = reliefF.feature_ranking(score) elif method == 'jmi': features = JMI.jmi(trnX,trnY, n_selected_features=n_features) elif method == 'mrmr': features = MRMR.mrmr(trnX,trnY,n_selected_features=n_features) elif method == 'infogain': features = MIM.mim(trnX,trnY,n_selected_features=n_features) elif method == 'svmrfe': features = svmrfe(trnX,trnY) elif method == 'hdmr': sobol_set_all = scipy.io.loadmat('sobol_set.mat') sobol_set = sobol_set_all['sobol_set'] sobol_set = sobol_set.astype(float) params = {'sobol_set':sobol_set,'k':1,'p':3,'M':1000,'b':'L'} models = hdmrlearn(trnX,trnY,params) features,w = hdmrselect(X,models) elif method == 'hdmrhaar': sobol_set_all = scipy.io.loadmat('sobol_set.mat') sobol_set = sobol_set_all['sobol_set'] sobol_set = sobol_set.astype(float) params = {'sobol_set':sobol_set,'k':1,'p':255,'M':1000,'b':'H'} models = hdmrlearn(trnX,trnY,params) features,w = hdmrselect(X,models) else: print(method + 'does no exist') cputime = time.time() - start_time print features print 'cputime %f' % cputime return {'features': features, 'cputime': cputime}
def __init__(self, X, y, classifier, init_style, fratio_weight): Problem.__init__(self, minimized=True) self.X = X self.y = y self.no_instances, self.no_features = self.X.shape self.threshold = 0.6 self.dim = self.no_features self.clf = classifier self.init_style = init_style self.f_weight = fratio_weight # stratified only applicable when enough instnaces for each class k = 10 labels, counts = np.unique(self.y, return_counts=True) label_min = np.min(counts) if label_min < k: self.skf = KFold(n_splits=k, shuffle=True, random_state=1617) self.skf_valid = KFold(n_splits=k, shuffle=True, random_state=1990) else: self.skf = SKF(n_splits=k, shuffle=True, random_state=1617) self.skf_valid = SKF(n_splits=k, shuffle=True, random_state=1990) self.scores = reliefF(self.X, self.y, k=1) self.scores = self.scores / np.sum(self.scores) # from Orange.data import Domain, Table # from Orange.preprocess.discretize import EntropyMDL # from Orange.preprocess import Discretize # from skfeature.utility.mutual_information import su_calculation # domain = Domain.from_numpy(X=X, Y=y) # table = Table.from_numpy(domain=domain, X=X, Y=y) # disc = Discretize() # disc.method = EntropyMDL(force=True) # table_dis = disc(table) # X_dis = table_dis.X # test_scores = [] # for i in range(self.no_features): # test_scores.append(su_calculation(X_dis[:, i], y)) # test_scores = np.array(test_scores) # test_scores = test_scores/np.sum(test_scores) # self.scores = test_scores self.surrogate_clf = SVC(random_state=1617)
def weight(): # x_train, datamat, y_train,labelmat = cross_validation.train_test_split(comtest.iloc[0:len(comtest),1:comtest.shape[1]-1],comtest.iloc[0:len(comtest),-1], test_size = 0.2,random_state = j) # datamat=np.array(datamat,dtype=np.float) # labelmat=np.array(labelmat,dtype=np.int) datamat=np.array(comtest.iloc[0:len(comtest),1:comtest.shape[1]-1],dtype=np.float) #提取病例数据及其标签 labelmat=np.array(comtest.iloc[0:len(comtest),-1],dtype=np.int) datamat=preprocess(datamat) for i in range(len(labelmat)): if labelmat[i]==0: labelmat[i]=-1;#adaboost只能区分-1和1的标签 Relief = reliefF.reliefF(datamat, labelmat) #计算Relieff下的特征权重 print('Relief, 第%s次验证 '%(1)) Fisher= fisher_score.fisher_score(datamat, labelmat) #计算fisher下的特征权重 print('Fisher, 第%s次验证 '%(1)) gini= gini_index.gini_index(datamat,labelmat) #计算gini下的特征权重 gini=-gini print('gini, 第%s次验证 '%(1)) print("done_ %s" ) return Relief, Fisher, gini
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 10 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the score of each feature on the training set score = reliefF.reliefF(X[train], y[train]) # rank features in descending order according to score idx = reliefF.feature_ranking(score) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] print('num:', num_fea) print('selected_fs:', idx) # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print('Accuracy:', float(correct) / 10)
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the score of each feature on the training set score = reliefF.reliefF(X[train], y[train]) # rank features in descending order according to score idx = reliefF.feature_ranking(score) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print 'Accuracy:', float(correct)/10
mean_pos=np.mean(positive_feaure,axis=0)#正类中,各特征的平均值 mean_neg=np.mean(negtive_feature,axis=0)#负类中,各样本的平均值 std_pos=np.std(positive_feaure,ddof=1,axis=0)#正类中各特征值的标准差 std_neg=np.std(negtive_feature,ddof=1,axis=0)#负类中各特征值的标准差 F_up=np.square(mean_pos-mean_feature)+np.square(mean_neg-mean_feature) F_down=np.square(std_pos)+np.square(std_neg) F_score=F_up/F_down """ #------------calculate the FS score with scikit-feature package--------------# from skfeature.function.similarity_based import fisher_score from skfeature.function.information_theoretical_based import MRMR from skfeature.function.similarity_based import reliefF from skfeature.function.statistical_based import gini_index Relief = reliefF.reliefF(datamat, labelmat) Fisher= fisher_score.fisher_score(datamat, labelmat) # mRMR,J,M,=MRMR.mrmr(datamat,labelmat,n_selected_features=80) # mRMR=-mRMR gini= gini_index.gini_index(datamat,labelmat) gini=-gini FSscore=np.column_stack((Relief,Fisher,gini))#合并三个分数 FSscore=ann.preprocess(FSscore) FinalScore=np.sum(FSscore,axis=1) FS=np.column_stack((FSscore,FinalScore)) FS_nor=ann.preprocess(FS)#将最后一列联合得分归一化 FS=pd.DataFrame(FS_nor,columns=["Relief", "Fisher","gini","FinalScore"],index=featurenames) # FS.to_csv("F:\Githubcode\AdaBoost\myown\FSscore.csv")
idx = lap_score.feature_ranking(score) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # fisher_score score = fisher_score.fisher_score(X_train, y_train) idx = fisher_score.feature_ranking(score) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # reliefF score = reliefF.reliefF(X_train, y_train) idx = reliefF.feature_ranking(score) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # chi_square score = chi_square.chi_square(np.abs(X_train), y_train) idx = chi_square.feature_ranking(score) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # pca
def reliefF_featureSelection(x, y): score = reliefF.reliefF(x, y) rank = score_to_rank(score) return rank
import scipy.io from skfeature.function.similarity_based import reliefF mat = scipy.io.loadmat( '/Users/shenzixiao/Dropbox/DATA/ASU/FaceImageData/COIL20.mat') X = mat['X'] X = X.astype(float) y = mat['Y'] y = y[:, 0] n_samples, n_features = X.shape score = reliefF.reliefF(X, y) idx = reliefF.feature_ranking(score) print(idx) print(score)
start=time.time() for data ,outfile in zip(datasets,xlsxfile): #getting the data accuracy_list=[] roc_list=[] genes_list=[] time_list=[] mat = scipy.io.loadmat('dataset/'+data) X = mat['data'] Y = X[:, 0] Y=Y.astype(int) X=X[:,1:] ## mRMR_sf,a,b=MRMR.mrmr(X,Y,n_selected_features=100) ## X=X[:,mRMR_sf[0:100]] score1 = reliefF.reliefF(X, Y) idx = reliefF.feature_ranking(score1) X=X[:,idx[0:60]] row,col=X.shape a=np.unique(Y) Y1 = label_binarize(Y, classes=a.tolist()) n_classes = a.size writer = pd.ExcelWriter(outfile, engine='xlsxwriter') n_samples, n_features = X.shape feature_numbers=np.linspace(1, len(X[0]), len(X[0])) algorithm = OMOPSO(Schaffer(),None,swarm_size = 100,leader_size = 100) #algorithm = NSGAII(Schaffer(),population_size = 15) algorithm.run(2000) features_list=[] auc_score_list=[]
#Get classes y_data = ad['Label'] y = pd.DataFrame(y_data) y=y.values.ravel() #Save the resmapling data into npy X_resampled = np.load('lymph_x.npy') y_resampled = np.load('lymph_y.npy') cv=StratifiedKFold(n_splits=14) from skfeature.function.similarity_based import reliefF for train, test in cv.split(X_resampled, y_resampled): score = reliefF.reliefF(X_resampled[train], y_resampled[train]) print(score) idx = reliefF.feature_ranking(score) X_resampled = pd.DataFrame(X_resampled) X_resampled.columns = X.columns.values X1 = X_resampled.iloc[:, [idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6], idx[7], idx[8], idx[9], idx[10], idx[11]]] #X1 = X_resampled.iloc[:, [idx[0], idx[1], idx[2], idx[3], idx[4],idx[5]]] #print(X_resampled.columns.values) X_resampled = X1
def relief_FS(X_train, y_train): score = reliefF.reliefF(X_train, y_train) idx = reliefF.feature_ranking(score) return (idx, score)
# ------------------------ Fisher Score ------------------------ print "Fisher Score:" scores5 = fisher_score.fisher_score(X, y) g1 = lambda e: e[1] g10 = lambda e: e[1][0] R5, _ = zip(*sorted(enumerate(sorted(enumerate(-scores5), key=g1)), key=g10)) #print scores5 formatted_scores5 = ['%.2f' % elem for elem in scores5] print formatted_scores5 print R5 # ------------------------ Relief-F ------------------------ print "Relief-F:" scores6 = reliefF.reliefF(X, y) g1 = lambda e: e[1] g10 = lambda e: e[1][0] R6, _ = zip(*sorted(enumerate(sorted(enumerate(-scores6), key=g1)), key=g10)) #print scores6 formatted_scores6 = ['%.2f' % elem for elem in scores6] print formatted_scores6 print R6 # ------------------------ Final Ranking Calculation ------------------------ finalRanks = R1 finalRanks = np.add(finalRanks, R2) finalRanks = np.add(finalRanks, R3) finalRanks = np.add(finalRanks, R4)
def fit(self, X, y): if self.name == 'LASSO': # print self.params['alpha'] LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 # alpha = self.params['alpha'] # alpha = .9 - ((self.params['alpha'] - 1.0) * (1 - 0.1)) / ((50 - 1) + 0.1) # print alpha enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) # if y_pred_enet.coef_ if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'RFS': W = RFS.rfs(X, construct_label_matrix(y), gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] return idx
n_samples, n_features = train_data.shape # 随机采样1000个样本用于计算 X = np.array(train_data) y = np.array(train_label) X_relief, y_relief = shuffle(X, y, n_samples=10000, random_state=0) ''' Filter 方法: Distance:RelieF Dependence:Chi-squared Information:MIFS (Mutual Information Feature ''' # Relief 和 Chi 都是给出每个特征值的一个score,MIFS稍有不同,电脑是第二行也可以当作一个分数,将这三种分数都归一化为0-1之间的数值,求平均 RelieF_score = reliefF.reliefF(X_relief, y_relief[:, 0], k=n_features) # RelieF Chi = chi_square.chi_square(X, y[:, 0]) # 返回值,第一行为特征值排序后的结果,第二行为目标函数,第三行是自变量与相应变量之间的互信息 Mifs = MIFS.mifs(X_relief, y_relief[:, 0], n_selected_features=n_features) ''' 使用mean method 进行选择融合 ''' scores = pd.DataFrame({'Feature': list(Mifs[0]), 'MIFS': list(Mifs[1])}) scores = scores.sort_values(by=['Feature']) scores['Relief'] = RelieF_score scores['Chi'] = Chi # 归一化 min_max_scaler = preprocessing.MinMaxScaler() scores['MIFS_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['MIFS']]) scores['Relief_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['Relief']])
def fit(self, X, y): idx = [] if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) elif self.tp == 'filter': if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'Fisher': # obtain the score of each feature on the training set score = fisher_score.fisher_score(X, y) # rank features in descending order according to score idx = fisher_score.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] elif self.tp == 'wrapper': model_fit = self.model.fit(X, y) model = SelectFromModel(model_fit, prefit=True) idx = model.get_support(indices=True) elif self.tp == 'SLB': # one-hot-encode on target y = construct_label_matrix(y) if self.name == 'SMBA': scba = fs.SCBA(data=X, alpha=self.params['alpha'], norm_type=self.params['norm_type'], verbose=self.params['verbose'], thr=self.params['thr'], max_iter=self.params['max_iter'], affine=self.params['affine'], normalize=self.params['normalize'], step=self.params['step'], PCA=self.params['PCA'], GPU=self.params['GPU'], device=self.params['device']) nrmInd, sInd, repInd, _ = scba.admm() if self.params['type_indices'] == 'nrmInd': idx = nrmInd elif self.params['type_indices'] == 'repInd': idx = repInd else: idx = sInd if self.name == 'RFS': W = RFS.rfs(X, y, gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'LASSO': LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) return idx
######################### Data Preprocessing sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) num_pip = Pipeline([ ('imputer', SimpleImputer(strategy="median")), ('std_scaler', StandardScaler()), ]) X_train = num_pip.fit_transform(X_train) X_test = num_pip.transform(X_test) print('fs') ########################### Apply Feature Selection methods :ReliefF, Laplacian score & Fisher #ReliefF score_rel = reliefF.reliefF(X_train, y_train) idx_rel = reliefF.feature_ranking(score_rel) #Laplacian score kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "k": 7, 't': 1, 'reliefF': True } W = construct_W.construct_W(X_train, **kwargs_W) score_lap = lap_score.lap_score(X_train, W=W) idx_lap = lap_score.feature_ranking(score_lap) #Fisher score_fish = fisher_score.fisher_score(X_train, y_train) print(score_fish)
def relief_FS(X_train, y_train): #n_samples, n_features = X.shape score = reliefF.reliefF(X_train, y_train) idx = reliefF.feature_ranking(score) return (idx, score)