def testDigits(kTup=('rbf', 10)): dataArr, labelArr = loadImages('trainingDigits') b, alphas = svmMLiA.smoP(dataArr, labelArr, 200, 0.0001, 10000, kTup) datMat = mat(dataArr) labelMat = mat(labelArr).transpose() svInd = nonzero(alphas.A > 0)[0] sVs = datMat[svInd] labelSV = labelMat[svInd] print("there are %d Support Vectors" % shape(sVs)[0]) m, n = shape(datMat) errorCount = 0 for i in range(m): kernelEval = svmMLiA.kernelTrans(sVs, datMat[i, :], kTup) predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b if sign(predict) != sign(labelArr[i]): errorCount += 1 print("the training error rate is: %f" % (float(errorCount) / m)) dataArr, labelArr = loadImages('testDigits') errorCount = 0 datMat = mat(dataArr) labelMat = mat(labelArr).transpose() m, n = shape(datMat) for i in range(m): kernelEval = svmMLiA.kernelTrans(sVs, datMat[i, :], kTup) predict = kernelEval.T * multiply(labelSV, alphas[svInd]) + b if sign(predict) != sign(labelArr[i]): errorCount += 1 print("the test error rate is: %f" % (float(errorCount) / m))
def testRbf(k1=1.3): dataArr, labelArr = svmMLiA.loadDataSet('testSetRBF.txt') b, alphas = svmMLiA.smoP(dataArr, labelArr, 200, 0.0001, 10000, ('rbf', k1)) dataMat = np.mat(dataArr) labelMat = np.mat(labelArr).transpose() svInd = np.nonzero(alphas.A > 0)[0] sVs = dataMat[svInd] labelSv = labelMat[svInd] print('there are %d Support Vectors' % np.shape(sVs)[0]) m,n = np.shape(dataMat) errorCount = 0 for i in range(m): kernelEval = svmMLiA.kernelTrans(sVs, dataMat[i,:], ('rbf', k1)) predict = kernelEval.T * np.multiply(labelSv, alphas[svInd]) + b if np.sign(predict) != np.sign(labelArr[i]): errorCount += 1 print('the training error rate is: %f' % (float(errorCount) / m)) dataArr, labelArr = svmMLiA.loadDataSet('testSetRBF2.txt') errorCount = 0 dataMat = np.mat(dataArr) labelMat = np.mat(labelArr).transpose() m,n = np.shape(dataMat) for i in range(m): kernelEval = svmMLiA.kernelTrans(sVs, dataMat[i,:], ('rbf', k1)) predict = kernelEval.T * np.multiply(labelSv, alphas[svInd]) + b if np.sign(predict) != np.sign(labelArr[i]): errorCount += 1 print('the test error rate is: %f' % (float(errorCount) / m))
def testRbf(k1=100): dataArr,labelArr = svm.loadDataSet('../data/test1.txt') dataMat = mat(dataArr) #print dataMat b,alphas = svm.smoP(dataArr, labelArr, 200, 0.01, 10000, ('rbf', k1)) #C=200 important datMat=mat(dataArr); labelMat = mat(labelArr).transpose() #print alphas.A>0 svInd=nonzero(alphas.A>0)[0] sVs=datMat[svInd] #get matrix of only support vectors #print dataMat #ax.scatter(sVs[:,0], sVs[:,1], s=30, c='green', marker='s') #ax.scatter(sVs[:,0], sVs[:,1]) labelSV = labelMat[svInd]; print "there are %d Support Vectors" % shape(sVs)[0] m,n = shape(datMat) errorCount = 0 for i in range(m): kernelEval = svm.kernelTrans(sVs,datMat[i,:],('rbf', k1)) predict=kernelEval.T * multiply(labelSV,alphas[svInd]) + b if sign(predict)!=sign(labelArr[i]): errorCount += 1 print "the training error rate is: %f" % (float(errorCount)/m) dataArr,labelArr = svm.loadDataSet('../data/test2.txt') errorCount = 0 datMat=mat(dataArr); labelMat = mat(labelArr).transpose() m,n = shape(datMat) fig = plt.figure() ax = fig.add_subplot(111) #print sVs for i in range(m): kernelEval = svm.kernelTrans(sVs,datMat[i,:],('rbf', k1)) predict=kernelEval.T * multiply(labelSV,alphas[svInd]) + b if sign(predict)!=sign(labelArr[i]): errorCount += 1 if sign(predict) > 0: #c = [int((0.2/float(predict))) for cc in range(0, 60)] #ax.scatter(range(0, 60), array(datMat[i,:]), array(c), array(c)) ax.scatter(range(0, 60), array(datMat[i,:]), s=20, c='green', marker='s') #print datMat[i,:], sign(predict), sign(labelArr[i]) continue else: ax.scatter(range(0, 60), array(datMat[i,:]), s=20, c='red', marker='s') #print datMat[i,:], sign(predict), sign(labelArr[i]) continue for i in range(shape(sVs)[0]): ax.scatter(range(0, 60), array(sVs[i]), s=10, c='yellow', marker='s') continue plt.show() print "the test error rate is: %f" % (float(errorCount)/m)
def titanicBysvm(filename,k1=1.3): trainingdataarr,traininglabelarr,testdataarr,testlaeblarr = loadDataSet(filename) # C = 200 , toler = 0.0001 , maxiteration = 10000 # b,alphas = svm.smoSimple(trainingdataarr,traininglabelarr,100,0.001,10000) b,alphas = svm.smoP(trainingdataarr,traininglabelarr, 200, 0.0001, 10000, ('rbf', k1)) # w f(x) = w.T*x + b w = svm.calcWs(alphas,trainingdataarr,traininglabelarr) dataMat = mat(trainingdataarr); labelmat = mat(traininglabelarr).transpose() # svInd is the index of support vector,sVs is the support vectors,labelSV is the support vector labels svInd = nonzero(alphas.A > 0)[0] sVs = dataMat[svInd] labelSV = labelmat[svInd] print "there are %d Support Vectors" % shape(sVs)[0] m,n = shape(trainingdataarr) errorCount = 0 for i in range(m): kernelEval = svm.kernelTrans(sVs,dataMat[i,:], ('rbf',k1)) predict = kernelEval.T * multiply(labelSV,alphas[svInd]) + b # predict = trainingdataarr[i,:] * mat(w) + b # print predict # print trainingdataarr[i,:] # print " " # print predict # print "\n" if sign(predict) != sign(traininglabelarr[i]): errorCount += 1 print "the number of errors is %d" % errorCount print "the training accuracy rate is: %f" % (1 - (float)(errorCount) / m) # test data running testErrorCount = 0 dataMat = mat(testdataarr); testlabelMat = mat(testlaeblarr).transpose() m,n = shape(testdataarr) for i in range(m): kernelEval = svm.kernelTrans(sVs,dataMat[i,:],('rbf',k1)) predict = kernelEval.T * multiply(labelSV,alphas[svInd]) + b if sign(predict) != sign(testlaeblarr[i]): testErrorCount += 1 print "the test accuracy rate is %.3f" % (1 - float(testErrorCount) / m)
import svmMLiA from numpy import * dataArr, labelArr = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dataArr, labelArr, 0.6, 0.001, 40) ws = svmMLiA.calcWs(alphas, dataArr, labelArr) print ws datMat = mat(dataArr) print datMat[0]*mat(ws)+b print labelArr[0] print datMat[1]*mat(ws)+b print labelArr[1] print datMat[2]*mat(ws)+b print labelArr[2]
from numpy import * import svmMLiA dataArr, labelArr = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dataArr, labelArr, 0.6, 0.001, 40) ws = svmMLiA.calcWs(alphas, dataArr, labelArr) print(ws) dataMat = mat(dataArr) print(dataMat[0] * mat(ws) + b) print(labelArr[0]) print(dataMat[2] * mat(ws) + b) print(labelArr[2]) print(dataMat[1] * mat(ws) + b) print(labelArr[1])
iter: 30 i:17, pairs changed 1 j not moving enough iteration number: 0 j not moving enough iteration number: 1 b: matrix([[-3.8486163]]) alphas[alphas>0] = matrix([[0.09313378, 0.27456007, 0.04445935, 0.3232345 ]]) """ # To see which points of our dataset are support vectors for i in range(100): if alphas[i] > 0.0: print(dataArr[i], labelArr[i]) """ output: [4.658191, 3.507396] -1.0 [3.457096, -0.082216] -1.0 [2.893743, -1.643468] -1.0 [6.080573, 0.418886] 1.0 """ """ optimised SMOP """ b, alphas = SVM.smoP(dataArr, labelArr, 0.6, 0.001, 40) """ get alpha for supporting vectors """ ws = SVM.calcWs(alphas, dataArr, labelArr) """ test rbf kernel """ SVM.testRbf() """ testdigits classification using rbf kernel """ SVM.testDigits(('rbf', 20))
import svmMLiA dm, ls = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoSimple(dm, ls, 0.6, 0.001, 40) # 简版smo算法 alphas[alphas > 0] shape(alphas[alphas > 0]) svMat = [] for i in range(100): if alphas[i] > 0.0: svMat.append(dm[i]) print dm[i], ls[i] # 支持向量 svmMLiA.plot(dm, ls, svMat) import svmMLiA dm, ls = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dm, ls, 0.6, 0.001, 40) ws = svmMLiA.calcWs(alphas, dm, ls) mat(dm)[0] * mat(ws) + b # 对第0个数据分类 ls[0] import svmMLiA dm, ls = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dm, ls, 0.6, 0.001, 40) svMat = [] for i in range(100): if alphas[i] > 0.0: svMat.append(dm[i]) ws = svmMLiA.calcWs(alphas, dm, ls) svmMLiA.plot(dm, ls, svMat, ws, float(b))
#coding=utf-8 import svmMLiA as svm import functionsCV as cv import cv2 from saveData import * from numpy import * #首先完成样本提取工作 print "Get samples..." dataArr,labelArr = svm.loadImages('trainingDigits') print "Get samples successfully!" #开始训练 print "Start training..." b,alphas = svm.smoP(dataArr,labelArr,200,0.0001,10000,('rbf',10)) storeData(b,'b.txt') storeData(alphas,'alphas.txt') print "Training completed!" #寻找支持向量 dataMat = mat(dataArr) labelMat = mat(labelArr).transpose() svInd = nonzero(alphas.A>0)[0]#支持向量索引 sVs = dataMat[svInd]#支持向量 labelSV = labelMat[svInd]#支持向量的标签 storeData(svInd,'svInd.txt') storeData(sVs,'sVs.txt') storeData(labelSV,'labelSV.txt')
def test_smo_all(): dataArr, labelArr = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dataArr, labelArr, 0.6, 0.001, 40) print(b) print(alphas[alphas > 0])
def test_4(): dataArr, labelArr = svmMLiA.loadDataSet('testSet.txt') b, alphas = svmMLiA.smoP(dataArr, labelArr, 0.6, 0.001, 40) print shape(alphas[alphas > 0]) for i in range(100): if alphas[i] > 0.0: print dataArr[i], labelArr[i]