def test_SSelect(): tt = get_traintest() x_train, y_train, x_test, y_test, path = next(tt) x_train, y_train, x_test, y_test, path = next(tt) SSelect_score = SSelect.SSelect(x_train, y_train, x_test) SSelect_score_rank = SSelect.feature_ranking(SSelect_score) num_fea = 100 # number of selected features selected_fea = SSelect_score_rank[:num_fea] clf = svm.LinearSVC() clf.fit(x_train[:, selected_fea], y_train) y_predict = clf.predict(x_test[:, selected_fea]) accuracy = accuracy_score(y_test, y_predict) print('Accuracy : {0}'.format(accuracy))
def test_lsdf(): tt = get_traintest() x_train, y_train, x_test, y_test, path = next(tt) lsdf_score = lsdf.lsdf(x_train, y_train, x_test) lsdf_score_rank = lsdf.feature_ranking(lsdf_score) num_fea = 100 # number of selected features idx = lsdf_score_rank[:num_fea] accuracy = 0 run_num = 10 for i in range(run_num): clf = svm.LinearSVC() clf.fit(x_train[:, idx], y_train) y_predict = clf.predict(x_test[:, idx]) accuracy += accuracy_score(y_test, y_predict) print('Accuracy : {0}'.format(accuracy/run_num))
def cal_baseline(): num_folders = 3 output_path = './result/' fn = 'baseline_accuracy_folders_{0}.txt'.format(num_folders) # load data gd = get_data() for X, y, path in gd: ss = cross_validation.StratifiedKFold(y, n_folds=num_folders, shuffle=True) # perform evaluation on classification task clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the score of each feature on the training set score = fisher_score.fisher_score(X[train], y[train]) # rank features in descending order according to score idx = fisher_score.feature_ranking(score) # train a classification model with the selected features on the training dataset clf.fit(X[train], y[train]) # predict the class labels of samples2 data y_predict = clf.predict(X[test]) # obtain the classification accuracy on the samples2 data acc = accuracy_score(y[test], y_predict) correct += acc # output the average classification accuracy over all k folds avg_accuracy = correct * 1.0 / num_folders new_path = output_path + path.split('data')[-1].strip() create_path(new_path) with open(new_path + '/' + fn, 'w+') as f: print(avg_accuracy, file=f) print('{0} finish!'.format(__file__))
def main(): # load data gd = get_data() X,y, path = next(gd) n_samples, n_features = X.shape # number of samples2 and number of features # split data into several folds num_folders = 3 ss = cross_validation.StratifiedKFold(y, n_folds=num_folders, shuffle=True) # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the score of each feature on the training set score = fisher_score.fisher_score(X[train], y[train]) # rank features in descending order according to score idx = fisher_score.feature_ranking(score) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of samples2 data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the samples2 data acc = accuracy_score(y[test], y_predict) correct += acc # output the average classification accuracy over all 10 folds print('Accuracy:', float(correct)/num_folders)