"//encodingFile.h5", mode="r") featureRowMatrix = fileh.root.featureRowMatrix sizeOfSeqList = featureRowMatrix.shape[1] labelDict = algFile.readLabelDictFromFile() labelVec = np.array(classifierFile.convertLabelDict2List(labelDict)) numOfLabelData = len(labelVec) kFeatures = range(1, 60) accuracyScoreList = [] accuracyScoreList2 = [] sumarizeAccuracy2 = [] loo = LeaveOneOut() #variance seqRowMatrixFull = algFile.readSeqRowMatFromFile() sel = VarianceThreshold() sel.fit_transform(seqRowMatrixFull) sortedIndexVarList = np.flipud(np.argsort(sel.variances_)) newFeatureRowMatrix = np.transpose(seqRowMatrixFull)[sortedIndexVarList] newSeqRowMatrix = np.transpose(newFeatureRowMatrix) newSeqRowOfLabelData = newSeqRowMatrix[:numOfLabelData] #leave one out #variance for numOfFeature in kFeatures: seqRowOfLabelData = newSeqRowOfLabelData[:, :numOfFeature] tmpAccuracyScoreList2 = [] for train_index, test_index in loo.split(seqRowOfLabelData): X_train, X_test = seqRowOfLabelData[ train_index], seqRowOfLabelData[test_index] y_train, y_test = labelVec[train_index], labelVec[test_index]
labelVec = [None] * numOfLabelData for label, listIndexSeq in labelDict.iteritems(): for index in listIndexSeq: labelVec[index] = label return labelVec if __name__ == '__main__': labelDict = algFile.readLabelDictFromFile() # tmp = convertLabelDict2List(labelDict) # tmp[1].remove(None) # labelVec = tmp[1] labelVec = convertLabelDict2List(labelDict) labelVec.remove(None) numOfLabelData = len(labelVec) seqRowMatrix = algFile.readSeqRowMatFromFile( '../outputfile/afterFeatureSelection.h5') seqRowOfLabelData = seqRowMatrix[:numOfLabelData] neigh = KNeighborsClassifier(n_neighbors=5, metric=cosine_distances) tmp2 = neigh.fit(seqRowOfLabelData, labelVec) labelPredict = neigh.predict(seqRowOfLabelData) clf = svm.SVC() clf.fit(seqRowOfLabelData, labelVec) # seqRowMatrix = algFile.readSeqRowMatFromFile() # clf = svm.SVC() # clf.fit(seqRowMatrix[:numOfLabelData], labelVec) # labelReal = labelVec[77]