Example #1
0
def fcbf():
    # http://featureselection.asu.edu/html/skfeature.function.information_theoretical_based.FCBF.html
    before = datetime.datetime.now()
    result = FCBF.fcbf(data, labels, mode="index", delta=0)  # treshold je delta
    after = datetime.datetime.now()
    print("FCBF")
    print(len(result))
    print("cas: " + str(after - before))
    print('\n')
    if len(result) < len(header):
        transform_and_save(result, "FCBF")
Example #2
0
    def execute(data, cols):
        y = data['GroundTruth'].values
        x_orig = data.drop(['GroundTruth'], axis=1)
        x = x_orig.values

        (idx, uncertainty_idx) = FCBF.fcbf(x, y, n_selected_features=len(cols))

        headers = ["Name", "Score"]
        values = sorted(zip(x_orig.columns[idx], uncertainty_idx),
                        key=lambda xi: xi[1] * -1)

        return tabulate(values, headers, tablefmt="plain")
Example #3
0
    def _execute(data, cols):
        y = data['GroundTruth'].values
        x = data.drop(['GroundTruth'], axis=1).values

        # split data into 10 folds
        # ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
        ss = model_selection.KFold(n_splits=10,
                                   random_state=None,
                                   shuffle=True)

        # ss = cross_validate(svc, x, y, cv=10, scoring='accuracy')

        # perform evaluation on classification task
        num_fea = len(cols)  # number of selected features
        clf = svm.LinearSVC()  # linear SVM

        correct = 0
        for train, test in ss.split(x, y):
            # obtain the index of each feature on the training set
            x_train = x[train]
            y_train = y[train]
            (idx, uncertainty_idx) = FCBF.fcbf(x_train,
                                               y_train,
                                               n_selected_features=num_fea)

            # obtain the dataset on the selected features
            features_idx = idx[0:num_fea]
            features = x[:, features_idx]

            # train a classification model with the selected features on the training dataset
            clf.fit(features[train], y[train])

            # predict the class labels of test data
            y_predict = clf.predict(features[test])

            # obtain the classification accuracy on the test data
            acc = accuracy_score(y[test], y_predict)
            correct = correct + acc

            print(idx, train)
            print(features_idx)
            print(acc)

        # output the average classification accuracy over all 10 folds
        print('Accuracy:', float(correct) / 10)
Example #4
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/colon.mat')
    X = mat['X']    # data
    X = X.astype(float)
    y = mat['Y']    # label
    y = y[:, 0]
    n_samples, n_features = X.shape    # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 10    # number of selected features
    clf = svm.LinearSVC()    # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the index of each feature on the training set
        idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea)

        # obtain the dataset on the selected features
        features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print 'Accuracy:', float(correct)/10
Example #5
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/colon.mat')
    X = mat['X']  # data
    X = X.astype(float)
    y = mat['Y']  # label
    y = y[:, 0]
    n_samples, n_features = X.shape  # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 10  # number of selected features
    clf = svm.LinearSVC()  # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the index of each feature on the training set
        idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea)

        # obtain the dataset on the selected features
        features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print('Accuracy:', float(correct) / 10)
Example #6
0
def FBCF_featureSelection(x, y):
    idx = FCBF.fcbf(x, y)
    rank = feature_ranking(idx)
    return rank
from sklearn.feature_selection import RFE
from sklearn.svm import SVR

bestFeat = SelectKBest()
bestFeat.fit(train_X, train_Y)
feat_scr = zip(feats, bestFeat.scores_)
feat_scr = [f for f in feat_scr if not np.isnan(f[1])]
sorted_fetas = sorted(feat_scr, key=lambda k: k[1], reverse=True)

# estimator = SVR(kernel="linear")
# selector = RFE(estimator, 5, step=1)
# selector.fit(train_X, train_Y)  # slow

from sklearn.ensemble import GradientBoostingClassifier
g_cls = GradientBoostingClassifier(n_estimators=10)
g_cls.fit(train_X, train_Y)
g_feats = g_cls.feature_importances_
g_feat_scr = zip(feats, g_feats)
g_feat_scr = [f for f in g_feat_scr if not np.isnan(f[1])]
g_sorted_fetas = sorted(g_feat_scr, key=lambda k: k[1], reverse=True)

from skfeature.function.information_theoretical_based import FCBF, LCSI, MRMR, JMI
score = FCBF.fcbf(train_X, train_Y)
fcbf_sorted = [feats[i] for i in score]

score = MRMR.mrmr(train_X, train_Y, n_selected_features=50)
MRMR_sorted = [feats[i] for i in score]

score = JMI.jmi(train_X, train_Y, n_selected_features=50)
JMI_sorted = [feats[i] for i in score]
bestFeat.fit(train_X, train_Y)
feat_scr = zip(feats,bestFeat.scores_)
feat_scr = [f for f in feat_scr if not np.isnan(f[1])]
sorted_fetas = sorted(feat_scr, key=lambda k:k[1], reverse=True)

# estimator = SVR(kernel="linear")
# selector = RFE(estimator, 5, step=1)
# selector.fit(train_X, train_Y)  # slow

from sklearn.ensemble import GradientBoostingClassifier
g_cls = GradientBoostingClassifier(n_estimators=10)
g_cls.fit(train_X, train_Y)
g_feats = g_cls.feature_importances_
g_feat_scr = zip(feats,g_feats)
g_feat_scr = [f for f in g_feat_scr if not np.isnan(f[1])]
g_sorted_fetas = sorted(g_feat_scr, key=lambda k:k[1], reverse=True)


 
from skfeature.function.information_theoretical_based import FCBF, LCSI, MRMR, JMI
score = FCBF.fcbf(train_X, train_Y) 
fcbf_sorted= [feats[i] for i in score]

score = MRMR.mrmr(train_X, train_Y, n_selected_features = 50) 
MRMR_sorted= [feats[i] for i in score]

score = JMI.jmi(train_X, train_Y, n_selected_features = 50) 
JMI_sorted= [feats[i] for i in score]