xMat = mat(xMat, dtype = float) yMat = mat(yMat, dtype = float) #Regularize the matrix xMat[:, 1:], xMeans, xStd = regression.regularize(xMat[:, 1:]) xCheckMat[:, 1:] = (xCheckMat[:, 1:] - xMeans)/xStd preDictPerComponent = [] for yIdx in range(0, 2): #caculate computing result and then communication result yMatTmp = yMat[:, yIdx] taskPredict = 0 for kElement in bestKList: taskPredict = 0 #get the result for cared data wr = regression.lwlr(xCheckMat, xMat,yMatTmp.T,kElement) #wr = regression.standRegres(xMat,yMatTmp.T) if (wr != None): #print wr taskPredict = xCheckMat * wr #print taskPredict #print "Task Size: %d, Predict Hours: %d Result[%d] is %f with k %f" % (tTSize, predictHours, yIdx, taskPredict, kElement) #print wr break if taskPredict != 0: tmp = taskPredict.reshape(-1).tolist() #sum only used for get the value but not the matrix tmp = [j for i in tmp for j in i] preDictPerComponent.extend(tmp) else: print "Error! Task Size: %d, Predict Hours: %d Task %d can not get predict value" % (tTSize, predictHours, yIdx)
fr = open("/home/yu/workspace/machine-learning-class/ex1/ex1data2.txt") xMat = [] yMat = [] for line in fr.readlines(): tmpEle = line.split(',') xMat.append([1, tmpEle[0], tmpEle[1]]) yMat.append([tmpEle[2]]) bestKList = [ 10 ] #[0.07, 0.3, 0.1, 0.7, 3, 10, 28, 40, 60, 80, 100]#[100, 80, 60, 40, 28, 10, 3, 0.7, 0.3, 0.07, 0.01] xCheckMat = mat([1, 1650, 3], dtype=float) xMat = mat(xMat, dtype=float) yMat = mat(yMat, dtype=float) #Regularize the matrix xMat[:, 1:], xMeans, xStd = regression.regularize(xMat[:, 1:]) xCheckMat[:, 1:] = (xCheckMat[:, 1:] - xMeans) / xStd for kElement in bestKList: taskPredict = 0 #get the result for cared data wr = regression.lwlr(xCheckMat, xMat, yMat.T, kElement) #wr = regression.standRegres(xMat,yMat.T) if (wr != None): print wr taskPredict = xCheckMat * wr print taskPredict break
# # plt.show() # # #相关系数 # #print('corrcoef:');print(corrcoef(yHat.T,yMat)) #预测值和真实值得匹配程度 # #corrcoef: # # [[ 1. 0.13653777] # # [ 0.13653777 1. ]] # #end:Liner Regression xArr,yArr = regression.loadDataSet('ex0.txt') #对单点进行估计,输出预测值 print('xArr[0]:');print(xArr[0]) # xArr[0]: # [1.0, 0.067732] print(yArr[0]) #output: 3.176513 print(regression.lwlr(xArr[0],xArr,yArr,1.0)) #output: martix([[ 3.12204471]]) print(regression.lwlr(xArr[0],xArr,yArr,0.001)) #output: martix([[ 3.20175729]]) #为了得到数据集里所有点的估计,可以调用LwlrTest()函数: yHat = regression.lwlrTest(xArr,xArr,yArr,0.003) print('all points about yHat:'); print(yHat) #查看拟合效果 xMat = mat(xArr) #xArr是什么? srtInd = xMat[:,1].argsort(0) #对xArr排序 xSort = xMat[srtInd][:,0,:] #绘图: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:,1],yHat[srtInd]) ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],s=2,c='red')
yHat = xMat * ws #绘出数据集散点图和最佳拟合直线图 import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) #为了绘制计算出的最佳拟合曲线,需要绘出yHat的值 #若直线上的数据点次序混乱,绘图时将会出现问题,固要先将点按照升序排列 xCopy = xMat.copy() xCopy.sort(0) #这个应该是np中的sort,意思是按照0维度排序 yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) plt.show() #对单点进行估计 print yArr[0] print regression.lwlr(xArr[0], xArr, yArr, 1.0) print regression.lwlr(xArr[0], xArr, yArr, 0.001) #得到所有点的估计 yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
ax = fig.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) xCopy = xMat.copy() xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) plt.show() # 判断模型效果 yHat = xMat * ws corrcoef(yHat.T, yMat) # 看[0,1]和[1,0]位置,因为自己和自己相关性肯定是1 # 测试局部加权线性回归 reload(regression) xArr, yArr = regression.loadDataSet('ex0.txt') regression.lwlr(xArr[0], xArr, yArr, 1.0) # 单点测试 regression.lwlr(xArr[0], xArr, yArr, 0.001) yHat = regression.lwlrTest(xArr, xArr, yArr, 0.5) # 预测xArr,可以给不同的k测试不同的效果 # 绘制估计值和原始值 xMat = mat(xArr) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show() # 在真实数据上 reload(regression)
ax = fig.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) xCopy = xMat.copy() xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) plt.show() yHat = xMat * ws print "corrcoef(yHat.T,yMat):", corrcoef(yHat.T, yMat) #8.2 局部加权线性回归 xArr, yArr = regression.loadDataSet(homedir + 'ex0.txt') print "yArr[0]:", yArr[0] print "xArr[0]:", xArr[0] print "regression.lwlr(xArr[0],xArr,yArr,1.0):", regression.lwlr( xArr[0], xArr, yArr, 1.0) print "regression.lwlr(xArr[0],xArr,yArr,0.001):", regression.lwlr( xArr[0], xArr, yArr, 0.001) print ":", yHat = regression.lwlrTest(xArr, xArr, yArr, 0.02) xMat = mat(xArr) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] # print "xMat",xMat # print "srtInd:",srtInd # print "xSort:",xSort fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
# # import matplotlib.pyplot as plt # # fig=plt.figure() # ax=fig.add_subplot(111) # # ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0]) # # xCopy=xMat.copy(); # xCopy.sort(0) # yHat=xCopy*ws # # ax.plot(xCopy[:,1],yHat) # plt.show() ws = regression.lwlr(xArr[0], xArr, yArr, 1.0) print(ws) yHat = regression.lwlrTest(xArr, xArr, yArr, 0.03) xMat = mat(xArr) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T[:, 0].flatten().A[0], s=2,
fig = plt.figure() #创建画布 ax = fig.add_subplot(211) #ax = fig.add_subplot(349) 参数349的意思是:将画布分割成3行4列,图像画在从左到右从上到下的第9块, # 3410是不行的,可以用另一种方式(3,4,10)。 # ax = fig.add_subplot(2,1,1) # ax.plot(x,y) # ax = fig.add_subplot(2,2,3) # ax.plot(x,y) # plt.show() 显示多图 ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) xCopy = xMat.copy() xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) print corrcoef(yHat.T, yMat) # 转置 保证都是行向量 xArrL, yArrL = regression.loadDataSet('ex0.txt') yArrL[0] regression.lwlr(xArrL[0], xArrL, yArrL, 1.0) yHatL = regression.lwlrTest(xArrL, xArrL, yArrL, 0.01) xMatL = mat(xArrL) yMatL = mat(yArrL) srtInd = xMatL[:, 1].argsort(0) xSort = xMatL[srtInd][:, 0, :] ax = fig.add_subplot(2, 1, 2) ax.scatter(xMatL[:, 1].flatten().A[0], yMatL.T[:, 0].flatten().A[0]) ax.plot(xSort[:, 1], yHatL[srtInd]) plt.show() print corrcoef(yHatL.T, yMatL)
import regression xArr, yArr = regression.loadDataSet() # print(xArr[0:2]) w = regression.standRegres(xArr, yArr) # print(w) print(yArr[0], regression.lwlr(xArr[0], xArr, yArr, 0.001)) yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003) print(yHat) xMat = np.mat(xArr) srtInd = xMat[:, 1].argsort(0) #? xSort = xMat[srtInd][:, 0, :] #? fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) plt.show() xMat = mat(dm) yMat = mat(ls) yHat = xMat * ws corrcoef(yHat.T, yMat) # 求相关系数 # 局部加权线性回归 import regression from numpy import * dm, ls = regression.loadDataSet('ex0.txt') ls[0] regression.lwlr(dm[0], dm, ls, 1.0) yHat = regression.lwlrTest(dm, dm, ls, 0.01) xMat = mat(dm) strInd = xMat[:, 1].argsort(0) xSort = xMat[strInd][:, 0, :] import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[strInd]) ax.scatter(xMat[:, 1].flatten().A[0], mat(ls).T.flatten().A[0], s=2, c='red') plt.show() # 预测鲍鱼寿命 import regression from numpy import *
#!/usr/bin/env python3 # -*- coding: utf-8 -*- '8.2' __author__ = 'lxp' import regression import numpy as np import matplotlib.pyplot as plt xArr, yArr = regression.loadDataSet('ex0.txt') print(regression.lwlr(xArr[0], xArr, yArr, 1)) print(regression.lwlr(xArr[0], xArr, yArr, 0.001)) yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003) xMat = np.mat(xArr) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], np.mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
xArr_origin, yArr_origin = regression.loadDataSet("foo.txt") # print(yArr_origin) xArr = [] yArr = [] m = (shape(xArr_origin))[0] yHat = zeros(m) xArr.append(xArr_origin[0][:]) yArr.append(yArr_origin[0]) yHat[0] = yArr_origin[0] for i in range(1, m): # k = best_k(xArr, yArr) k = 0.06830078125 x = xArr_origin[i][:] y = regression.lwlr(x, xArr, yArr, k) # print(y.flatten().A[0][0]) yHat[i] = y.flatten().A[0][0] while yHat[i] <= 0: k = k + 0.01 y = regression.lwlr(x, xArr, yArr, k) yHat[i] = y.flatten().A[0][0] xArr.append(x) yArr.append(yArr_origin[i]) print(i, k, yHat[i], yArr[i]) with open('workfile', 'a') as f: f.write(str(i)) f.write("\t") f.write(str(k)) f.write("\t") f.write(str(yHat[i]))
xMat, yMat = mat(xArr), mat(yArr) yHat = xMat * ws import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:,1].flatten().A[0], yMat.T[:,0].flatten().A[0]) xCopy = xMat.copy() xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:,1], yHat) plt.show() yHat = xMat * ws print corrcoef(yHat.T, yMat) xArr, yArr = regression.loadDataSet('ex0.txt') print yArr[0] print regression.lwlr(xArr[0], xArr, yArr, 1.0) print regression.lwlr(xArr[0], xArr, yArr, 0.001) yHat = regression.lwlrTest(xArr, xArr, yArr, 0.01) # 1.0 与标准回归一致 欠拟合 # 0.003 过拟合 xMat = mat(xArr) srtInd = xMat[:,1].argsort(0) xSort = xMat[srtInd][:,0,:] #import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:,1], yHat[srtInd]) ax.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
# wx = reg.standRegres(xArr, yArr) # print wx # # xMat = mat(xArr) # yMat = mat(yArr) # # yHat = xMat * wx # corrcoef(yHat.H, yMat) # # fig = plt.figure() # ax = fig.add_subplot(111) # ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) # xCopy = xMat.copy() # xCopy.sort(0) # yHat = xCopy * wx # ax.plot(xCopy[:, 1], yHat) # plt.show() print reg.lwlr(xArr[0], xArr, yArr, 1.0) print reg.lwlr(xArr[0], xArr, yArr, 0.001) yHat = reg.lwlrTest(xArr, xArr, yArr, 0.003) xMat = mat(xArr) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show();