Example #1
0
                             "//encodingFile.h5",
                             mode="r")
    featureRowMatrix = fileh.root.featureRowMatrix
    sizeOfSeqList = featureRowMatrix.shape[1]
    labelDict = algFile.readLabelDictFromFile()
    labelVec = np.array(classifierFile.convertLabelDict2List(labelDict))
    numOfLabelData = len(labelVec)
    kFeatures = range(1, 60)
    accuracyScoreList = []
    accuracyScoreList2 = []
    sumarizeAccuracy2 = []

    loo = LeaveOneOut()

    #variance
    seqRowMatrixFull = algFile.readSeqRowMatFromFile()
    sel = VarianceThreshold()
    sel.fit_transform(seqRowMatrixFull)
    sortedIndexVarList = np.flipud(np.argsort(sel.variances_))
    newFeatureRowMatrix = np.transpose(seqRowMatrixFull)[sortedIndexVarList]
    newSeqRowMatrix = np.transpose(newFeatureRowMatrix)
    newSeqRowOfLabelData = newSeqRowMatrix[:numOfLabelData]

    #leave one out #variance
    for numOfFeature in kFeatures:
        seqRowOfLabelData = newSeqRowOfLabelData[:, :numOfFeature]
        tmpAccuracyScoreList2 = []
        for train_index, test_index in loo.split(seqRowOfLabelData):
            X_train, X_test = seqRowOfLabelData[
                train_index], seqRowOfLabelData[test_index]
            y_train, y_test = labelVec[train_index], labelVec[test_index]
Example #2
0
    labelVec = [None] * numOfLabelData
    for label, listIndexSeq in labelDict.iteritems():
        for index in listIndexSeq:
            labelVec[index] = label
    return labelVec


if __name__ == '__main__':
    labelDict = algFile.readLabelDictFromFile()
    #    tmp = convertLabelDict2List(labelDict)
    #    tmp[1].remove(None)
    #    labelVec = tmp[1]
    labelVec = convertLabelDict2List(labelDict)
    labelVec.remove(None)
    numOfLabelData = len(labelVec)
    seqRowMatrix = algFile.readSeqRowMatFromFile(
        '../outputfile/afterFeatureSelection.h5')
    seqRowOfLabelData = seqRowMatrix[:numOfLabelData]

    neigh = KNeighborsClassifier(n_neighbors=5, metric=cosine_distances)
    tmp2 = neigh.fit(seqRowOfLabelData, labelVec)
    labelPredict = neigh.predict(seqRowOfLabelData)

    clf = svm.SVC()
    clf.fit(seqRowOfLabelData, labelVec)

#    seqRowMatrix = algFile.readSeqRowMatFromFile()
#    clf = svm.SVC()
#    clf.fit(seqRowMatrix[:numOfLabelData], labelVec)

#    labelReal = labelVec[77]