def cross_validation(file): n = int(0.2 * datanp.shape[0]) #抽樣以五折驗證,所以乘0.2 mean_accuracy_svm = 0 fs_list = [] fs_dict = {} #計算每一折的list 總list feature list 出現結果 for i in range(5): i = i + 1 print str(i) + "-fold" if (i == 1): #第一折 idx = range(0, n) elif (i == 2): #第二折 idx = range(n, n * 2) elif (i == 3): #第三折 idx = range(n * 2, n * 3) elif (i == 4): #第四折 idx = range(n * 3, n * 4) else: #第五折 idx = range(n * 4, file.shape[0]) testdata = file[idx, :] idx_IN_columns = [i for i in xrange(np.shape(file)[0]) if i not in idx] traindata = file[idx_IN_columns, :] #accuracy_svm,feature_select_index_list = SVM.SVM_main(traindata,testdata) feature_select_index_list = SVM.SVM_main(traindata, testdata) #mean_accuracy_svm = accuracy_svm + mean_accuracy_svm fs_list.extend(feature_select_index_list) #計算總結果list出現頻率 for i in fs_list: if fs_list.count(i) > 1: fs_dict[i] = fs_list.count(i) print 'feature frequency:' + str(fs_dict) #fs_dict 顯示feature 各自出現的頻率 f = sum(list(fs_dict.values())) / len(list( fs_dict.values())) #計算出現頻率的平均 f 門檻值 print 'feature frequency avg:' + str(f) fs_list = find_key(fs_dict, f) print 'feature result:' + str(fs_list)