#!/usr/bin/python2.7 # _*_ coding: utf-8 _*_ """ @Author: MarkLiu """ import adaboost import numpy as np # 训练算法 dataMatrix, classLabels = adaboost.loadSimpData() bestDecisionStumps = adaboost.adaboostTrainDecisionStump(dataMatrix, classLabels, 20) print bestDecisionStumps print "-------测试算法-------" testDatas = [[0, 0], [5, 0]] weightedForecastClasses, confidence = \ adaboost.adaboostClassify(testDatas, bestDecisionStumps) print "预测的结果及对应的分类把握:" print np.sign(weightedForecastClasses).T print confidence.T
datasArr.append(floatdatas[:-1]) labelsArr.append(int(floatdatas[-1])) frTrain.close() return datasArr, labelsArr if __name__ == '__main__': trainDatas, trainLabels = loadDataSet('dataset/trainingDatas.txt') print np.shape(trainDatas) print trainDatas print np.shape(trainLabels) print trainLabels # 训练算法获得多个简单决策树分类器 bestDecisionStumps = adaboost.adaboostTrainDecisionStump(trainDatas, trainLabels, 40) testDatas, testLabels = loadDataSet('dataset/testDatas.txt') # 返回预测结果 weightedForecastClasses, confidence = \ adaboost.adaboostClassify(testDatas, bestDecisionStumps) # 统计分类错误 errorArr = np.matrix(np.zeros((len(testLabels), 1))) errorArr[np.sign(weightedForecastClasses) != np.matrix(testLabels).T] = 1 print "errorArr:" print errorArr.T print "分类的结果:" print "预测的类别:", np.sign(weightedForecastClasses).T print "测试数据共 %d 个" % len(testLabels) print "错误分类共 %d 个" % errorArr.sum() print "分类的错误率为:", 1.0 * errorArr.sum() / len(testLabels)