def test() : import LoadData trainMat, classLabelVector = LoadData.loadTrainDataFromCSV(TRAIN_FILE) testMat = LoadData.loadTestDataFromCSV(TEST_FILE) rfbenchmarkVector = LoadData.loadRFBenchmarkFromCSV(RF_BENCHMARK_FILE) columnLabels = [] for i in range(1, 785) : columnLabels.append(i) m = int(len(columnLabels) ** 0.5) # rf = createRandomForest(4, trainMat[50:], classLabelVector[50:], columnLabels, m) rf = createRandomForest(10, trainMat, classLabelVector, columnLabels, m) # testMat = trainMat[0:50] i = 0 n = 0 for testData in testMat : classList = [] for tree in rf : label = classify(tree, columnLabels, testData) classList.append(label) voteLabel = majorityCnt(classList) if voteLabel == rfbenchmarkVector[i] : n += 1 # print "the real answer is ", classLabelVector[i], "the label is ", voteLabel i += 1 print n accuracy = n / float(len(rfbenchmarkVector)) print accuracy
def test() : import LoadData trainMat, classLabelVector = LoadData.loadTrainDataFromCSV(TRAIN_FILE) trainMat = array(trainMat) testMat = LoadData.loadTestDataFromCSV(TEST_FILE) k = 3 # testMat = trainMat[0:50] # i = 0 # for testData in testMat : # label = classify_kNN(testData, trainMat[50:], classLabelVector[50:], k) # print "the real answer is ", classLabelVector[i], "the label is ", label # i += 1 for testData in testMat : label = classify_kNN(testData, trainMat, classLabelVector, k)