コード例 #1
0
ファイル: __init__.py プロジェクト: miemieyanglove/MLStudy
import EXTRAS
import AdaBoost
import numpy

# EXTRAS.plotSimpleData()

# dataMat, labels = AdaBoost.loadSimpleData()

# D = numpy.mat(numpy.ones((5, 1)) / 5)

# bestStump, minError, bestClasEst = AdaBoost.buildStump(dataMat, labels, D)

dataMat, labels = AdaBoost.loadDataSet("E:/TestDatas/MachineLearningInAction/Ch07/horseColicTraining2.txt")

classifierArr, aggClassExt = AdaBoost.adaBoostTrainDS(dataMat, labels, 10)

AdaBoost.plotROC(aggClassExt.T, labels)
コード例 #2
0
from numpy import *
import AdaBoost
datMat, classLabels = AdaBoost.loadSimpData()
# D=mat(ones((5,1))/5)
# print(AdaBoost.buildingStump(datMat,classLabels,D))
classEstArr = AdaBoost.adaBoostTrainDS(datMat, classLabels, 10)
print(AdaBoost.adaClassify([[5, 5], [0, 0]], classEstArr))
コード例 #3
0
def loadDataSet(fileName):      #general function to parse tab -delimited floats
    numFeat = len(open(fileName).readline().split('\t')) #get number of fields 
    dataMat = []; labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr =[]
        curLine = line.strip().split('\t')
        for i in range(numFeat-1):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat,labelMat

datArr,labelArr = loadDataSet('horseColicTest2.txt')
weakClassArr,aggClassEst = AdaBoost.adaBoostTrainDS(datArr,labelArr,40)
'''
testDataArr,testLabelArr= loadDataSet('horseColicTest2.txt')
prediction10=AdaBoost.adaClassify(testDataArr,weakClassArr)
errArr = mat(ones((67,1)))
#统计错误数(错误率=错误数/67)
errArr[prediction10!=mat(testLabelArr).T].sum()
'''


'''非均衡分类问题
假设所有类别的分类代价,在大多数情况下不同类别的分类代价并不相等。
1.调节分类器的阈值
    一种不同分类器的评价方法:ROC曲线、AUC
    度量分类器性能的指标:构建一个同时使正确率和召回率最大的分类器是具有挑战性的。
'''
コード例 #4
0
# -*- coding: utf-8 -*-
"""

AdaBoost:简单数据集
@author: Jerry
"""
import numpy as np
import AdaBoost


def loadDataSet():
    dataMat = np.matrix(([1., 2.1], [2., 1.1], [1.3, 1.], [1., 1.], [2., 1.]))
    classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
    return dataMat, classLabels


if __name__ == '__main__':
    dataMat, classLabels = loadDataSet()

    #    AdaBoost.adaBoostTrainDS(dataMat,classLabels, 9)

    classifierArray = AdaBoost.adaBoostTrainDS(dataMat, classLabels, 30)
    predictedLabel = AdaBoost.adaClassify([0, 0], classifierArray)
    print(predictedLabel)
コード例 #5
0
# -*- coding: utf-8 -*-
from numpy import *
import AdaBoost as ab
import LAB as lab

if __name__ == '__main__':

    mylist = []
    mytrain = []
    # 载入网球
    for index in range(5):
        arr = lab.load('B0_%d.jpg' % index)
        arr2 = lab.LAB(arr).flatten()  # 转化后展平
        mylist.append(arr2.tolist())
        mytrain.append(1.)
    # 载入苹果
    for index in range(5):
        arr = lab.load('B2_%d.jpg' % index)
        arr2 = lab.LAB(arr).flatten()  # 转化后展平
        mylist.append(arr2.tolist())
        mytrain.append(-1.)
    #
    dataArr = matrix(mylist)
    classLabels = matrix(mytrain)
    D = mat(ones((10, 1)) / 10)
    classifierArray = ab.adaBoostTrainDS(dataArr, classLabels, 10)
    for x in classifierArray:
        print('alpha:', x['alpha'], 'dim:', x['dim'], 'thresh:', x['thresh'],
              'ineq:', x['ineq'])
コード例 #6
0
    ax = plt.subplot(111)
    #画图
    for index in sortedIndicies.tolist()[0]:
        if classLabels[index] == 1.0:
            delX = 0; 
            delY = yStep;
        else:
            delX = xStep; 
            delY = 0;
            ySum += cur[1]
        ax.plot([cur[0],cur[0]-delX],[cur[1],cur[1]-delY], c='b')
        cur = (cur[0]-delX,cur[1]-delY)
    ax.plot([0,1],[0,1],'b--')
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve for AdaBoost horse colic detection system')
    ax.axis([0,1,0,1])
    plt.show()
    print("the Area Under the Curve is: ",ySum*xStep)



if __name__ == '__main__':
    trainingMat,trainingLabels = AdaBoost.loadDataSet('horseColicTraining2.txt')
    classifierArray = AdaBoost.adaBoostTrainDS(trainingMat,trainingLabels, 10)
    
    testMat,testLabels = AdaBoost.loadDataSet('horseColicTest2.txt')
    prediction10 = AdaBoost.adaClassify(testMat, classifierArray)
    print(prediction10)
    
    plotROC(prediction10, testLabels)
コード例 #7
0
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 10 20:38:02 2018

@author: tf
"""

import AdaBoost
import numpy as np

#dataMat, labelMat = AdaBoost.loadDataSet()
#print(dataMat, '\n', labelMat)

#D = np.ones((5, 1)) / 5
#bestStump, minErr, bestClassEst = AdaBoost.buildStump(dataMat, labelMat, D)
#print(bestStump, '\n', minErr, '\n', bestClassEst)

#classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat)
#print(classifierArr)
#print(max(0.1,0.2))

#clas = AdaBoost.adaClassify(np.array([[5, 5], [0, 0]]), classifierArr)
#print(clas)

dataMat, labelMat = AdaBoost.loadFileDataSet('horseColicTraining2.txt')
classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat)
#print(classifierArr)

testDataMat, testLabelMat = AdaBoost.loadFileDataSet('horseColicTest2.txt')
errRate = AdaBoost.adaClassify(testDataMat, classifierArr, testLabelMat)
print(errRate)