def fcbf(): # http://featureselection.asu.edu/html/skfeature.function.information_theoretical_based.FCBF.html before = datetime.datetime.now() result = FCBF.fcbf(data, labels, mode="index", delta=0) # treshold je delta after = datetime.datetime.now() print("FCBF") print(len(result)) print("cas: " + str(after - before)) print('\n') if len(result) < len(header): transform_and_save(result, "FCBF")
def execute(data, cols): y = data['GroundTruth'].values x_orig = data.drop(['GroundTruth'], axis=1) x = x_orig.values (idx, uncertainty_idx) = FCBF.fcbf(x, y, n_selected_features=len(cols)) headers = ["Name", "Score"] values = sorted(zip(x_orig.columns[idx], uncertainty_idx), key=lambda xi: xi[1] * -1) return tabulate(values, headers, tablefmt="plain")
def _execute(data, cols): y = data['GroundTruth'].values x = data.drop(['GroundTruth'], axis=1).values # split data into 10 folds # ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) ss = model_selection.KFold(n_splits=10, random_state=None, shuffle=True) # ss = cross_validate(svc, x, y, cv=10, scoring='accuracy') # perform evaluation on classification task num_fea = len(cols) # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss.split(x, y): # obtain the index of each feature on the training set x_train = x[train] y_train = y[train] (idx, uncertainty_idx) = FCBF.fcbf(x_train, y_train, n_selected_features=num_fea) # obtain the dataset on the selected features features_idx = idx[0:num_fea] features = x[:, features_idx] # train a classification model with the selected features on the training dataset clf.fit(features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc print(idx, train) print(features_idx) print(acc) # output the average classification accuracy over all 10 folds print('Accuracy:', float(correct) / 10)
def main(): # load data mat = scipy.io.loadmat('../data/colon.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 10 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the index of each feature on the training set idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea) # obtain the dataset on the selected features features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print 'Accuracy:', float(correct)/10
def main(): # load data mat = scipy.io.loadmat('../data/colon.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 10 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the index of each feature on the training set idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea) # obtain the dataset on the selected features features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print('Accuracy:', float(correct) / 10)
def FBCF_featureSelection(x, y): idx = FCBF.fcbf(x, y) rank = feature_ranking(idx) return rank
from sklearn.feature_selection import RFE from sklearn.svm import SVR bestFeat = SelectKBest() bestFeat.fit(train_X, train_Y) feat_scr = zip(feats, bestFeat.scores_) feat_scr = [f for f in feat_scr if not np.isnan(f[1])] sorted_fetas = sorted(feat_scr, key=lambda k: k[1], reverse=True) # estimator = SVR(kernel="linear") # selector = RFE(estimator, 5, step=1) # selector.fit(train_X, train_Y) # slow from sklearn.ensemble import GradientBoostingClassifier g_cls = GradientBoostingClassifier(n_estimators=10) g_cls.fit(train_X, train_Y) g_feats = g_cls.feature_importances_ g_feat_scr = zip(feats, g_feats) g_feat_scr = [f for f in g_feat_scr if not np.isnan(f[1])] g_sorted_fetas = sorted(g_feat_scr, key=lambda k: k[1], reverse=True) from skfeature.function.information_theoretical_based import FCBF, LCSI, MRMR, JMI score = FCBF.fcbf(train_X, train_Y) fcbf_sorted = [feats[i] for i in score] score = MRMR.mrmr(train_X, train_Y, n_selected_features=50) MRMR_sorted = [feats[i] for i in score] score = JMI.jmi(train_X, train_Y, n_selected_features=50) JMI_sorted = [feats[i] for i in score]
bestFeat.fit(train_X, train_Y) feat_scr = zip(feats,bestFeat.scores_) feat_scr = [f for f in feat_scr if not np.isnan(f[1])] sorted_fetas = sorted(feat_scr, key=lambda k:k[1], reverse=True) # estimator = SVR(kernel="linear") # selector = RFE(estimator, 5, step=1) # selector.fit(train_X, train_Y) # slow from sklearn.ensemble import GradientBoostingClassifier g_cls = GradientBoostingClassifier(n_estimators=10) g_cls.fit(train_X, train_Y) g_feats = g_cls.feature_importances_ g_feat_scr = zip(feats,g_feats) g_feat_scr = [f for f in g_feat_scr if not np.isnan(f[1])] g_sorted_fetas = sorted(g_feat_scr, key=lambda k:k[1], reverse=True) from skfeature.function.information_theoretical_based import FCBF, LCSI, MRMR, JMI score = FCBF.fcbf(train_X, train_Y) fcbf_sorted= [feats[i] for i in score] score = MRMR.mrmr(train_X, train_Y, n_selected_features = 50) MRMR_sorted= [feats[i] for i in score] score = JMI.jmi(train_X, train_Y, n_selected_features = 50) JMI_sorted= [feats[i] for i in score]