import EXTRAS import AdaBoost import numpy # EXTRAS.plotSimpleData() # dataMat, labels = AdaBoost.loadSimpleData() # D = numpy.mat(numpy.ones((5, 1)) / 5) # bestStump, minError, bestClasEst = AdaBoost.buildStump(dataMat, labels, D) dataMat, labels = AdaBoost.loadDataSet("E:/TestDatas/MachineLearningInAction/Ch07/horseColicTraining2.txt") classifierArr, aggClassExt = AdaBoost.adaBoostTrainDS(dataMat, labels, 10) AdaBoost.plotROC(aggClassExt.T, labels)
from numpy import * import AdaBoost datMat, classLabels = AdaBoost.loadSimpData() # D=mat(ones((5,1))/5) # print(AdaBoost.buildingStump(datMat,classLabels,D)) classEstArr = AdaBoost.adaBoostTrainDS(datMat, classLabels, 10) print(AdaBoost.adaClassify([[5, 5], [0, 0]], classEstArr))
def loadDataSet(fileName): #general function to parse tab -delimited floats numFeat = len(open(fileName).readline().split('\t')) #get number of fields dataMat = []; labelMat = [] fr = open(fileName) for line in fr.readlines(): lineArr =[] curLine = line.strip().split('\t') for i in range(numFeat-1): lineArr.append(float(curLine[i])) dataMat.append(lineArr) labelMat.append(float(curLine[-1])) return dataMat,labelMat datArr,labelArr = loadDataSet('horseColicTest2.txt') weakClassArr,aggClassEst = AdaBoost.adaBoostTrainDS(datArr,labelArr,40) ''' testDataArr,testLabelArr= loadDataSet('horseColicTest2.txt') prediction10=AdaBoost.adaClassify(testDataArr,weakClassArr) errArr = mat(ones((67,1))) #统计错误数(错误率=错误数/67) errArr[prediction10!=mat(testLabelArr).T].sum() ''' '''非均衡分类问题 假设所有类别的分类代价,在大多数情况下不同类别的分类代价并不相等。 1.调节分类器的阈值 一种不同分类器的评价方法:ROC曲线、AUC 度量分类器性能的指标:构建一个同时使正确率和召回率最大的分类器是具有挑战性的。 '''
# -*- coding: utf-8 -*- """ AdaBoost:简单数据集 @author: Jerry """ import numpy as np import AdaBoost def loadDataSet(): dataMat = np.matrix(([1., 2.1], [2., 1.1], [1.3, 1.], [1., 1.], [2., 1.])) classLabels = [1.0, 1.0, -1.0, -1.0, 1.0] return dataMat, classLabels if __name__ == '__main__': dataMat, classLabels = loadDataSet() # AdaBoost.adaBoostTrainDS(dataMat,classLabels, 9) classifierArray = AdaBoost.adaBoostTrainDS(dataMat, classLabels, 30) predictedLabel = AdaBoost.adaClassify([0, 0], classifierArray) print(predictedLabel)
# -*- coding: utf-8 -*- from numpy import * import AdaBoost as ab import LAB as lab if __name__ == '__main__': mylist = [] mytrain = [] # 载入网球 for index in range(5): arr = lab.load('B0_%d.jpg' % index) arr2 = lab.LAB(arr).flatten() # 转化后展平 mylist.append(arr2.tolist()) mytrain.append(1.) # 载入苹果 for index in range(5): arr = lab.load('B2_%d.jpg' % index) arr2 = lab.LAB(arr).flatten() # 转化后展平 mylist.append(arr2.tolist()) mytrain.append(-1.) # dataArr = matrix(mylist) classLabels = matrix(mytrain) D = mat(ones((10, 1)) / 10) classifierArray = ab.adaBoostTrainDS(dataArr, classLabels, 10) for x in classifierArray: print('alpha:', x['alpha'], 'dim:', x['dim'], 'thresh:', x['thresh'], 'ineq:', x['ineq'])
ax = plt.subplot(111) #画图 for index in sortedIndicies.tolist()[0]: if classLabels[index] == 1.0: delX = 0; delY = yStep; else: delX = xStep; delY = 0; ySum += cur[1] ax.plot([cur[0],cur[0]-delX],[cur[1],cur[1]-delY], c='b') cur = (cur[0]-delX,cur[1]-delY) ax.plot([0,1],[0,1],'b--') plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve for AdaBoost horse colic detection system') ax.axis([0,1,0,1]) plt.show() print("the Area Under the Curve is: ",ySum*xStep) if __name__ == '__main__': trainingMat,trainingLabels = AdaBoost.loadDataSet('horseColicTraining2.txt') classifierArray = AdaBoost.adaBoostTrainDS(trainingMat,trainingLabels, 10) testMat,testLabels = AdaBoost.loadDataSet('horseColicTest2.txt') prediction10 = AdaBoost.adaClassify(testMat, classifierArray) print(prediction10) plotROC(prediction10, testLabels)
# -*- coding: utf-8 -*- """ Created on Wed Oct 10 20:38:02 2018 @author: tf """ import AdaBoost import numpy as np #dataMat, labelMat = AdaBoost.loadDataSet() #print(dataMat, '\n', labelMat) #D = np.ones((5, 1)) / 5 #bestStump, minErr, bestClassEst = AdaBoost.buildStump(dataMat, labelMat, D) #print(bestStump, '\n', minErr, '\n', bestClassEst) #classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat) #print(classifierArr) #print(max(0.1,0.2)) #clas = AdaBoost.adaClassify(np.array([[5, 5], [0, 0]]), classifierArr) #print(clas) dataMat, labelMat = AdaBoost.loadFileDataSet('horseColicTraining2.txt') classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat) #print(classifierArr) testDataMat, testLabelMat = AdaBoost.loadFileDataSet('horseColicTest2.txt') errRate = AdaBoost.adaClassify(testDataMat, classifierArr, testLabelMat) print(errRate)