def test_SSelect():
    tt = get_traintest()
    x_train, y_train, x_test, y_test, path = next(tt)
    x_train, y_train, x_test, y_test, path = next(tt)
    SSelect_score = SSelect.SSelect(x_train, y_train, x_test)
    SSelect_score_rank = SSelect.feature_ranking(SSelect_score)

    num_fea = 100  # number of selected features
    selected_fea = SSelect_score_rank[:num_fea]

    clf = svm.LinearSVC()
    clf.fit(x_train[:, selected_fea], y_train)
    y_predict = clf.predict(x_test[:, selected_fea])
    accuracy = accuracy_score(y_test, y_predict)
    print('Accuracy : {0}'.format(accuracy))
Exemple #2
0
def test_lsdf():
    tt = get_traintest()
    x_train, y_train, x_test, y_test, path = next(tt)
    lsdf_score = lsdf.lsdf(x_train, y_train, x_test)
    lsdf_score_rank = lsdf.feature_ranking(lsdf_score)

    num_fea = 100  # number of selected features
    idx = lsdf_score_rank[:num_fea]

    accuracy = 0
    run_num = 10
    for i in range(run_num):
        clf = svm.LinearSVC()
        clf.fit(x_train[:, idx], y_train)
        y_predict = clf.predict(x_test[:, idx])
        accuracy += accuracy_score(y_test, y_predict)
    print('Accuracy : {0}'.format(accuracy/run_num))
def cal_baseline():
    num_folders = 3
    output_path = './result/'
    fn = 'baseline_accuracy_folders_{0}.txt'.format(num_folders)

    # load data
    gd = get_data()
    for X, y, path in gd:

        ss = cross_validation.StratifiedKFold(y,
                                              n_folds=num_folders,
                                              shuffle=True)

        # perform evaluation on classification task
        clf = svm.LinearSVC()  # linear SVM

        correct = 0
        for train, test in ss:
            # obtain the score of each feature on the training set
            score = fisher_score.fisher_score(X[train], y[train])

            # rank features in descending order according to score
            idx = fisher_score.feature_ranking(score)

            # train a classification model with the selected features on the training dataset
            clf.fit(X[train], y[train])

            # predict the class labels of samples2 data
            y_predict = clf.predict(X[test])

            # obtain the classification accuracy on the samples2 data
            acc = accuracy_score(y[test], y_predict)
            correct += acc

        # output the average classification accuracy over all k folds
        avg_accuracy = correct * 1.0 / num_folders

        new_path = output_path + path.split('data')[-1].strip()
        create_path(new_path)
        with open(new_path + '/' + fn, 'w+') as f:
            print(avg_accuracy, file=f)

    print('{0} finish!'.format(__file__))
def main():
    # load data
    gd = get_data()
    X,y, path = next(gd)

    n_samples, n_features = X.shape    # number of samples2 and number of features

    # split data into several folds
    num_folders = 3
    ss = cross_validation.StratifiedKFold(y, n_folds=num_folders, shuffle=True)

    # perform evaluation on classification task
    num_fea = 100    # number of selected features
    clf = svm.LinearSVC()    # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the score of each feature on the training set
        score = fisher_score.fisher_score(X[train], y[train])

        # rank features in descending order according to score
        idx = fisher_score.feature_ranking(score)

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(selected_features[train], y[train])

        # predict the class labels of samples2 data
        y_predict = clf.predict(selected_features[test])

        # obtain the classification accuracy on the samples2 data
        acc = accuracy_score(y[test], y_predict)
        correct += acc

    # output the average classification accuracy over all 10 folds
    print('Accuracy:', float(correct)/num_folders)