def testRbf(k1=1.3): dataArr, labelArr = svmMLiA.loadDataSet('testSetRBF.txt') b, alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, ('rbf', k1)) #C=200 important datMat = mat(dataArr) labelMat = mat(labelArr).transpose() svInd = nonzero(alphas.A > 0)[0] sVs = datMat[svInd] #get matrix of only support vectors labelSV = labelMat[svInd] print("there are %d Support Vectors" % shape(sVs)[0]) m, n = shape(datMat) errorCount = 0 for i in range(m): kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1)) predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b if sign(predict) != sign(labelArr[i]): errorCount += 1 print("the training error rate is: %f" % (float(errorCount) / m)) dataArr, labelArr = svmMLiA.loadDataSet('testSetRBF2.txt') errorCount = 0 datMat = mat(dataArr) labelMat = mat(labelArr).transpose() m, n = shape(datMat) for i in range(m): kernelEval = kernelTrans(sVs, datMat[i, :], ('rbf', k1)) predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b if sign(predict) != sign(labelArr[i]): errorCount += 1 print("the test error rate is: %f" % (float(errorCount) / m)) ws = svmMLiA.calcWs(alphas, dataArr, labelArr) x0 = [] y0 = [] x1 = [] y1 = [] x_sv = [] y_sv = [] fig = plt.figure() ax = fig.add_subplot(111) for i in range(100): if alphas[i] > 0.0: x_sv.append(dataArr[i][0]) y_sv.append(dataArr[i][1]) elif labelArr[i] > 0.0: x0.append(dataArr[i][0]) y0.append(dataArr[i][1]) else: x1.append(dataArr[i][0]) y1.append(dataArr[i][1]) ax.scatter(x0, y0, color='r') ax.scatter(x1, y1, color='g') ax.scatter(x_sv, y_sv, color='b') # X = linspace(2, 6, 1000) # Y = [(float(ws[0]) * x + float(b)) / -float(ws[1]) for x in X] # ax.plot(X, Y) plt.show()
def titanicBysvm(filename,k1=1.3): trainingdataarr,traininglabelarr,testdataarr,testlaeblarr = loadDataSet(filename) # C = 200 , toler = 0.0001 , maxiteration = 10000 # b,alphas = svm.smoSimple(trainingdataarr,traininglabelarr,100,0.001,10000) b,alphas = svm.smoP(trainingdataarr,traininglabelarr, 200, 0.0001, 10000, ('rbf', k1)) # w f(x) = w.T*x + b w = svm.calcWs(alphas,trainingdataarr,traininglabelarr) dataMat = mat(trainingdataarr); labelmat = mat(traininglabelarr).transpose() # svInd is the index of support vector,sVs is the support vectors,labelSV is the support vector labels svInd = nonzero(alphas.A > 0)[0] sVs = dataMat[svInd] labelSV = labelmat[svInd] print "there are %d Support Vectors" % shape(sVs)[0] m,n = shape(trainingdataarr) errorCount = 0 for i in range(m): kernelEval = svm.kernelTrans(sVs,dataMat[i,:], ('rbf',k1)) predict = kernelEval.T * multiply(labelSV,alphas[svInd]) + b # predict = trainingdataarr[i,:] * mat(w) + b # print predict # print trainingdataarr[i,:] # print " " # print predict # print "\n" if sign(predict) != sign(traininglabelarr[i]): errorCount += 1 print "the number of errors is %d" % errorCount print "the training accuracy rate is: %f" % (1 - (float)(errorCount) / m) # test data running testErrorCount = 0 dataMat = mat(testdataarr); testlabelMat = mat(testlaeblarr).transpose() m,n = shape(testdataarr) for i in range(m): kernelEval = svm.kernelTrans(sVs,dataMat[i,:],('rbf',k1)) predict = kernelEval.T * multiply(labelSV,alphas[svInd]) + b if sign(predict) != sign(testlaeblarr[i]): testErrorCount += 1 print "the test accuracy rate is %.3f" % (1 - float(testErrorCount) / m)
def printsvm(Mat, label, alphas, b): fig = plt.figure() ax = fig.add_subplot(111) print 'to print scatter' i = 0 for dot in Mat: if alphas[i] > 0.0: ax.scatter(dot[0], dot[1], c='r', marker='o') else: ax.scatter(dot[0], dot[1]) i+=1 print 'to print line' ws = svmMLiA.calcWs(alphas, Mat, label) x = arange(0:3:2) print 'x=',x y = ws*x+b print y ax.plot(x, y) plt.show()
import svmMLiA from numpy import * dataArr, labelArr = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dataArr, labelArr, 0.6, 0.001, 40) ws = svmMLiA.calcWs(alphas, dataArr, labelArr) print ws datMat = mat(dataArr) print datMat[0]*mat(ws)+b print labelArr[0] print datMat[1]*mat(ws)+b print labelArr[1] print datMat[2]*mat(ws)+b print labelArr[2]
from numpy import * import svmMLiA dataArr, labelArr = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dataArr, labelArr, 0.6, 0.001, 40) ws = svmMLiA.calcWs(alphas, dataArr, labelArr) print(ws) dataMat = mat(dataArr) print(dataMat[0] * mat(ws) + b) print(labelArr[0]) print(dataMat[2] * mat(ws) + b) print(labelArr[2]) print(dataMat[1] * mat(ws) + b) print(labelArr[1])
def fit(self, dataArr, LabelArr): self.b, self.alphas = smoPK(dataArr, LabelArr, self.C, self.toler, self.maxIter) self.weights = calcWs(self.alphas, dataArr, LabelArr)
iter: 30 i:17, pairs changed 1 j not moving enough iteration number: 0 j not moving enough iteration number: 1 b: matrix([[-3.8486163]]) alphas[alphas>0] = matrix([[0.09313378, 0.27456007, 0.04445935, 0.3232345 ]]) """ # To see which points of our dataset are support vectors for i in range(100): if alphas[i] > 0.0: print(dataArr[i], labelArr[i]) """ output: [4.658191, 3.507396] -1.0 [3.457096, -0.082216] -1.0 [2.893743, -1.643468] -1.0 [6.080573, 0.418886] 1.0 """ """ optimised SMOP """ b, alphas = SVM.smoP(dataArr, labelArr, 0.6, 0.001, 40) """ get alpha for supporting vectors """ ws = SVM.calcWs(alphas, dataArr, labelArr) """ test rbf kernel """ SVM.testRbf() """ testdigits classification using rbf kernel """ SVM.testDigits(('rbf', 20))
def fit(self, dataMatIn, classLabels): b, alphas = smoPK(dataMatIn, classLabels, 0.6, 0.001, 40) ws = calcWs(alphas, dataMatIn, classLabels) return b, alphas, ws
dm, ls = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoSimple(dm, ls, 0.6, 0.001, 40) # 简版smo算法 alphas[alphas > 0] shape(alphas[alphas > 0]) svMat = [] for i in range(100): if alphas[i] > 0.0: svMat.append(dm[i]) print dm[i], ls[i] # 支持向量 svmMLiA.plot(dm, ls, svMat) import svmMLiA dm, ls = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dm, ls, 0.6, 0.001, 40) ws = svmMLiA.calcWs(alphas, dm, ls) mat(dm)[0] * mat(ws) + b # 对第0个数据分类 ls[0] import svmMLiA dm, ls = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dm, ls, 0.6, 0.001, 40) svMat = [] for i in range(100): if alphas[i] > 0.0: svMat.append(dm[i]) ws = svmMLiA.calcWs(alphas, dm, ls) svmMLiA.plot(dm, ls, svMat, ws, float(b)) import svmMLiA
if sign(predict)!=sign(labelArr[i]): errorCount += 1 print("the training error rate is: %f" % (float(errorCount)/m)) '''进行测试''' dataArr,labelArr = loadDataSet('testSetRBF2.txt') errorCount = 0 datMat=mat(dataArr); labelMat = mat(labelArr).transpose() m,n = shape(datMat) for i in range(m): kernelEval = kernelTrans(sVs,datMat[i,:],('rbf', k1)) predict=kernelEval.T * multiply(labelSV,oS.alphas[svInd]) + oS.b #加权求和,结果为一个数 if sign(predict)!=sign(labelArr[i]): errorCount += 1 print("the test error rate is: %f" % (float(errorCount)/m) ) '''result''' ws = calcWs(oS.alphas,dataArr,labelArr) print( 'aphas = ' + str(oS.alphas[oS.alphas>0]) ) print( 'b = ' + str(oS.b) ) print( 'w = ' + str(ws.T) ) a=oS.eCache al = oS.alphas #dataMatIn[0]*mat(ws)+b #判断一个数据点的分类 '''plot''' xcord0 = [];ycord0 = [] xcord1 = [];ycord1 = [] fr = open(filename)#this file was generated by 2normalGen.py for line in fr.readlines(): '''操作list类型读取txt''' lineSplit = line.strip().split('\t') xPt = float(lineSplit[0])
alphaPairsChanged += innerL(i, oS) print("fullSet, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged)) iter += 1 else: #go over non-bound (railed) alphas nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0] for i in nonBoundIs: alphaPairsChanged += innerL(i, oS) print("non-bound, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged)) iter += 1 if entireSet: entireSet = False #toggle entire set loop elif (alphaPairsChanged == 0): entireSet = True print("iteration number: %d" % iter) '''result''' ws = calcWs(oS.alphas, dataMatIn, classLabels) print('aphas = ' + str(oS.alphas[oS.alphas > 0])) print('b = ' + str(oS.b)) print('w = ' + str(ws.T)) a = oS.eCache #dataMatIn[0]*mat(ws)+b #判断一个数据点的分类 plotResult(filename, dataMatIn, classLabels, oS, ws) svmData = [] svmLabel = [] m, n = shape(dataMatIn) for i in range(m): if oS.alphas[i] > 0.0: svmData.append(dataMatIn[i]) svmLabel.append(classLabels[i])