def testGaussWeight(): xArr, yArr = re.loadDataSet('ex0.txt') xMat = mat(xArr) yMat = mat(yArr) fig = plt.figure() ax = fig.add_subplot(411) # 将画布分成1行1列,将从左到右,从上到下第一块显示图画 plt.yticks(linspace(2.5, 5, 6)) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0], c='purple', label='realData', marker='.') xMat.sort(0) weights = re.getGaussWeightArr([1, 0.5], xMat, 0.5) ax = fig.add_subplot(412) plt.yticks(linspace(0.6, 1, 9)) ax.plot(xMat[:, 1], weights) plt.text(0.7, 0.75, r'$k=0.5$') weights = re.getGaussWeightArr([1, 0.5], xMat, 0.1) ax = fig.add_subplot(413) plt.yticks(linspace(0, 1, 6)) ax.plot(xMat[:, 1], weights) plt.text(0.7, 0.5, r'$k=0.1$') weights = re.getGaussWeightArr([1, 0.5], xMat, 0.01) ax = fig.add_subplot(414) plt.yticks(linspace(0, 1, 6)) ax.plot(xMat[:, 1], weights, label='k=0.01') plt.text(0.7, 0.5, r'$k=0.01$') plt.show()
def main(): abX, abY = reg.loadDataSet('abalone.txt') print '------------------------training----------------------' yHat01 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1) error01 = rssErr(abY[0:99], yHat01.T) yHat1 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1) error1 = rssErr(abY[0:99], yHat1.T) yHat10 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10) error10 = rssErr(abY[0:99], yHat10.T) #the result show smaller kernel print "yHat01:", yHat01, "error01:", error01 print "yHat1", yHat1, "error1:", error1 print "yHat10", yHat10, "error10", error10 print '------------------------testing-------------------------' yHat01 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1) error01 = rssErr(abY[100:199], yHat01.T) yHat1 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1) error1 = rssErr(abY[100:199], yHat1.T) yHat10 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10) error10 = rssErr(abY[100:199], yHat10.T) #the result show smaller kernel print "error01:", error01 print "error1:", error1 print "error10", error10
def testStandRegres(): xArr, yArr = re.loadDataSet('ex0.txt') ws = re.standRegres(xArr, yArr) xMat = mat(xArr) yMat = mat(yArr) fig = plt.figure() ax = fig.add_subplot(111) # 将画布分成1行1列,将从左到右,从上到下第一块显示图画 ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0], c='purple', label='realData', marker='.') # scatter散点图 # matrix[a:b,c:d] 第a到b行,且第c到d列 左闭右开 # matrix[a,b] 第a行,第b列 xCopy = xMat.copy() # xCopy.sort(0)#y轴方向排序 yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat, c='green') # print yHat.T.flatten().A[0].size # print yMat.flatten().A[0].size # print yHat # print yMat correlation = corrcoef(yHat.T, yMat) print correlation plt.show()
def main(): abX, abY = reg.loadDataSet("abalone.txt") print "------------------------training----------------------" yHat01 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1) error01 = rssErr(abY[0:99], yHat01.T) yHat1 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1) error1 = rssErr(abY[0:99], yHat1.T) yHat10 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10) error10 = rssErr(abY[0:99], yHat10.T) # the result show smaller kernel print "yHat01:", yHat01, "error01:", error01 print "yHat1", yHat1, "error1:", error1 print "yHat10", yHat10, "error10", error10 print "------------------------testing-------------------------" yHat01 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1) error01 = rssErr(abY[100:199], yHat01.T) yHat1 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1) error1 = rssErr(abY[100:199], yHat1.T) yHat10 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10) error10 = rssErr(abY[100:199], yHat10.T) # the result show smaller kernel print "error01:", error01 print "error1:", error1 print "error10", error10
def main(): xArr, yArr = reg.loadDataSet('ex0.txt') print yArr[0] print lwlr(xArr[0], xArr, yArr, 1.0) print lwlr(xArr[0], xArr, yArr, 0.001) yHat = lwlrTest(xArr, xArr, yArr, 0.003) print "yHat:", yHat plot(xArr, yArr)
def testAbalone(): xArr, yArr = re.loadDataSet('abalone.txt') ws = re.standRegres(xArr, yArr) print ws for k in [2, 10]: calcErr(xArr, yArr, 0, 299, k, 300, 350, ws) calcErr(xArr, yArr, 0, 299, k, 350, 400, ws) calcErr(xArr, yArr, 0, 299, k, 400, 450, ws) calcErr(xArr, yArr, 0, 299, k, 450, 500, ws) calcErr(xArr, yArr, 0, 299, k, 500, 600, ws) print ''
def test1(): xArr, yArr = regression.loadDataSet('ex0.txt') ws = regression.standRegress(xArr, yArr) xMat = np.mat(xArr) yMat = np.mat(yArr) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) x = xMat.copy() x.sort(0) y = x * ws ax.plot(x[:, 1], y) plt.show()
def test2(): xArr, yArr = regression.loadDataSet('ex0.txt') yHat = regression.lwlrTest(xArr, xArr, yArr, 0.01) xMat = np.mat(xArr) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], np.mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
def lwlrResult(fileName, weight): xArr, yArr = regression.loadDataSet(fileName) yHat = regression.lwlrTest(xArr, xArr, yArr, weight) #取得各点的回归系数矩阵 # 画出回归曲线 xMat = mat(xArr) srtInd = xMat[:, 1].argsort(0) xSort = xMat[srtInd][:, 0, :] fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], yHat[srtInd]) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
def test1(): abX, abY = regression.loadDataSet('abalone.txt') yHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1) yHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1) yHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10) print(regression.rssError(abY[0:99], yHat01.T)) print(regression.rssError(abY[0:99], yHat1.T)) print(regression.rssError(abY[0:99], yHat10.T)) print('-------------------------------------------') yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1) yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1) yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10) print(regression.rssError(abY[100:199], yHat01.T)) print(regression.rssError(abY[100:199], yHat1.T)) print(regression.rssError(abY[100:199], yHat10.T))
def testLwlr(): xArr, yArr = re.loadDataSet('ex0.txt') xMat = mat(xArr) yMat = mat(yArr) fig = plt.figure() ax = fig.add_subplot(311) # 将画布分成1行1列,将从左到右,从上到下第一块显示图画 privateShow(ax, xMat, yMat, 1.0) ax = fig.add_subplot(312) privateShow(ax, xMat, yMat, 0.02) ax = fig.add_subplot(313) privateShow(ax, xMat, yMat, 0.002) plt.show()
def lineResult(fileName): xArr, yArr = regression.loadDataSet(fileName) ws = regression.standRegres(xArr, yArr) #取得回归系数矩阵 # 画出回归曲线 xMat = mat(xArr) yHat = xMat * ws fig = plt.figure() ax = fig.add_subplot(111) xCopy = xMat.copy() xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') plt.show()
def plotwMat(): """ 函数说明:绘制岭回归系数矩阵 """ font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) abX, abY = rg.loadDataSet('abalone.txt') redgeWeights = ridgeTest(abX, abY) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(redgeWeights) ax_title_text = ax.set_title(u'log(lambada)与回归系数的关系', FontProperties=font) ax_xlabel_text = ax.set_xlabel(u'log(lambada)', FontProperties=font) ax_ylabel_text = ax.set_ylabel(u'回归系数', FontProperties=font) plt.setp(ax_title_text, size=20, weight='bold', color='red') plt.setp(ax_xlabel_text, size=10, weight='bold', color='black') plt.setp(ax_ylabel_text, size=10, weight='bold', color='black') plt.show()
def plotstageWiseMat(): """ 函数说明:绘制岭回归系数矩阵 Website: http://www.cuijiahua.com/ Modify: 2017-11-20 """ font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) xArr, yArr = rg.loadDataSet('abalone.txt') returnMat = stageWise(xArr, yArr, 0.005, 1000) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(returnMat) ax_title_text = ax.set_title(u'前向逐步回归:迭代次数与回归系数的关系', FontProperties=font) ax_xlabel_text = ax.set_xlabel(u'迭代次数', FontProperties=font) ax_ylabel_text = ax.set_ylabel(u'回归系数', FontProperties=font) plt.setp(ax_title_text, size=15, weight='bold', color='red') plt.setp(ax_xlabel_text, size=10, weight='bold', color='black') plt.setp(ax_ylabel_text, size=10, weight='bold', color='black') plt.show()
def plotlwlrRegression(): """ 函数说明:绘制多条局部加权回归曲线 Parameters: 无 Returns: 无 """ font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) xArr, yArr = rg.loadDataSet('ex0.txt') # 加载数据集 yHat_1 = lwlrTest(xArr, xArr, yArr, 1.0) # 根据局部加权线性回归计算yHat yHat_2 = lwlrTest(xArr, xArr, yArr, 0.01) # 根据局部加权线性回归计算yHat yHat_3 = lwlrTest(xArr, xArr, yArr, 0.003) # 根据局部加权线性回归计算yHat xMat = np.mat(xArr) # 创建xMat矩阵 yMat = np.mat(yArr) # 创建yMat矩阵 srtInd = xMat[:, 1].argsort(0) # 排序,返回索引值 xSort = xMat[srtInd][:, 0, :] fig, axs = plt.subplots(nrows=3, ncols=1, sharex=False, sharey=False, figsize=(10, 8)) axs[0].plot(xSort[:, 1], yHat_1[srtInd], c='red') # 绘制回归曲线 axs[1].plot(xSort[:, 1], yHat_2[srtInd], c='red') # 绘制回归曲线 axs[2].plot(xSort[:, 1], yHat_3[srtInd], c='red') # 绘制回归曲线 axs[0].scatter(xMat[:, 1].flatten().A[0], yMat.flatten().A[0], s=20, c='blue', alpha=.5) # 绘制样本点 axs[1].scatter(xMat[:, 1].flatten().A[0], yMat.flatten().A[0], s=20, c='blue', alpha=.5) # 绘制样本点 axs[2].scatter(xMat[:, 1].flatten().A[0], yMat.flatten().A[0], s=20, c='blue', alpha=.5) # 绘制样本点 # 设置标题,x轴label,y轴label axs0_title_text = axs[0].set_title(u'局部加权回归曲线,k=1.0', FontProperties=font) axs1_title_text = axs[1].set_title(u'局部加权回归曲线,k=0.01', FontProperties=font) axs2_title_text = axs[2].set_title( u'局部加权回归曲线,k=0.003', FontProperties=font) plt.setp(axs0_title_text, size=8, weight='bold', color='red') plt.setp(axs1_title_text, size=8, weight='bold', color='red') plt.setp(axs2_title_text, size=8, weight='bold', color='red') plt.xlabel('X') plt.show()
def abaloneTest(): """ 预测鲍鱼的年龄 描述:机器学习实战示例8.3 预测鲍鱼的年龄 INPUT: 无 OUPUT: 无 """ # 加载数据 abX, abY = regression.loadDataSet("./data/abalone.txt") # 使用不同的核进行预测 oldyHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1) oldyHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1) oldyHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10) # 打印出不同的核预测值与训练数据集上的真实值之间的误差大小 print("old yHat01 error Size is :", regression.rssError(abY[0:99], oldyHat01.T)) print("old yHat1 error Size is :", regression.rssError(abY[0:99], oldyHat1.T)) print("old yHat10 error Size is :", regression.rssError(abY[0:99], oldyHat10.T)) # 打印出不同的核预测值与新数据集(测试数据集)上的真实值之间的误差大小 newyHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1) print("new yHat01 error Size is :", regression.rssError(abY[0:99], newyHat01.T)) newyHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1) print("new yHat1 error Size is :", regression.rssError(abY[0:99], newyHat1.T)) newyHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10) print("new yHat10 error Size is :", regression.rssError(abY[0:99], newyHat10.T)) # 使用简单的线性回归进行预测,与上面的计算进行比较 standWs = regression.standRegres(abX[0:99], abY[0:99]) standyHat = mat(abX[100:199]) * standWs print("standRegress error Size is:", regression.rssError(abY[100:199], standyHat.T.A))
# !/usr/bin/env python # -*- coding:utf-8 -*- # @Time : 2018. # @Author : 绿色羽毛 # @Email : [email protected] # @Blog : https://blog.csdn.net/ViatorSun # @Note : 线性回归 import regression from numpy import * import matplotlib.pyplot as plt xArr , yArr = regression.loadDataSet("data.txt") ws = regression.standRegres(xArr,yArr) print(ws) #预测值yHat xMat = mat(xArr) yMat = mat(yArr) yHat = xMat*ws #绘制数据集散列点 fig = plt.figure() #创建子图 ax = fig.add_subplot(1,1,1) #添加一个(1,1,1)子图 x = xMat[:,1].flatten().A[0] y = yMat.T[:,0].flatten().A[0]
# print(yArr[0]) # print(regression.lwlr(xArr[0], xArr, yArr, 1.0)) # print(regression.lwlr(xArr[0], xArr, yArr, 0.001)) # # yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003) # xMat = mat(xArr) # srtInd = xMat[:, 1].argsort(0) # xSort = xMat[srtInd][:, 0, :] # # import matplotlib.pyplot as plt # fig = plt.figure() # ax = fig.add_subplot(111) # # ax.plot(xSort[:, 1], yHat[srtInd]) # ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') # plt.show() # abX, abY = regression.loadDataSet('../data/abalone.txt') # ridgeWeights = regression.ridgeTest(abX, abY) # print(ridgeWeights) # # import matplotlib.pyplot as plt # fig = plt.figure() # ax = fig.add_subplot(111) # ax.plot(ridgeWeights) # plt.show() xArr, yArr = regression.loadDataSet('../data/abalone.txt') regression.stageWise(xArr, yArr, 0.01, 200)
# -*- coding:utf-8 -*- import regression from numpy import * xArr,yArr = regression.loadDataSet("abalone.txt") #stageWeight = regression.stageWise(xArr, yArr, 0.01, 200) #print (stageWeight) stageWeight = regression.stageWise(xArr, yArr, 0.0001, 50000) #print (stageWeight)
# coding: utf-8 # linear_regression/test_multiple.py import regression import numpy as np from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm import matplotlib.pyplot as plt import matplotlib.ticker as mtick if __name__ == "__main__": srcX, y = regression.loadDataSet('data/houses.txt') # 新建特征 m, n = srcX.shape X = regression.normalize(srcX.copy()) X = np.concatenate((np.ones((m, 1)), X), axis=1) rate = 1 maxLoop = 50 epsilon = 1 result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y) theta, errors, thetas = result print('theta is:') print(theta) print('........') # 预测价格 normalizedSize = (1650 - srcX[:, 0].mean(0)) / srcX[:, 0].std(0) normalizedBr = (3 - srcX[:, 1].mean(0)) / srcX[:, 1].std(0)
# -*- coding=utf-8 -*- import regression from numpy import * xArr,yArr = regression.loadDataSet("ex0.txt") #l0 = regression.lwlr(xArr[0], xArr, yArr, 1.0) #l1 = regression.lwlr(xArr[0],xArr,yArr,0.001) #print ("l0 is %s" % l0) #print ("l1 is %s" % l1) yHat = regression.lwlrTest(xArr, xArr, yArr, 1.0) print ("yHat is %s" % yHat) xMat = mat(xArr) #axis=0 按列排序;axis=1 按行排序 #返回xMat下标编号 srtInd = xMat[:,1].argsort(0) #print("srtInd is %s" % srtInd) xSort = xMat[srtInd][:,0,:] #这是什么意思????? #print ("xSort is %s"% xSort) import matplotlib.pyplot as plt
# coding: utf-8 # linear_regression/test_sgd.py import regression from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as np if __name__ == "__main__": X, y = regression.loadDataSet('data/ex1.txt'); m,n = X.shape X = np.concatenate((np.ones((m,1)), X), axis=1) rate = 0.01 maxLoop = 100 epsilon =0.01 result, timeConsumed = regression.sgd(rate, maxLoop, epsilon, X, y) theta, errors, thetas = result # 绘制拟合曲线 fittingFig = plt.figure() title = 'sgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed) ax = fittingFig.add_subplot(111, title=title) trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0]) xCopy = X.copy() xCopy.sort(0)
def main(): xArr, yArr = reg.loadDataSet('abalone.txt') rntMat = stageWise(xArr, yArr,0.01,200) print rntMat
# coding: utf-8 # linear_regression/test_lwr.py import regression import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as np if __name__ == "__main__": srcX, y = regression.loadDataSet('data/lwr.txt') m, n = srcX.shape srcX = np.concatenate((srcX[:, 0], np.power(srcX[:, 0], 2)), axis=1) # 特征缩放 X = regression.standardize(srcX.copy()) X = np.concatenate((np.ones((m, 1)), X), axis=1) rate = 0.1 maxLoop = 1000 epsilon = 0.01 predicateX = regression.standardize(np.matrix([[8, 64]])) predicateX = np.concatenate((np.ones((1, 1)), predicateX), axis=1) result, t = regression.lwr(rate, maxLoop, epsilon, X, y, predicateX, 1) theta, errors, thetas = result result2, t = regression.lwr(rate, maxLoop, epsilon, X, y, predicateX, 0.1) theta2, errors2, thetas2 = result2 # 打印特征点
# coding: utf-8 # linear_regression/test_temperature_polynomial.py import regression import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as np if __name__ == "__main__": srcX, y = regression.loadDataSet('temperature.txt') m, n = srcX.shape srcX = np.concatenate((srcX[:, 0], np.power(srcX[:, 0], 2)), axis=1) # 特征缩放 X = regression.standardize(srcX.copy()) X = np.concatenate((np.ones((m, 1)), X), axis=1) rate = 0.1 maxLoop = 1000 epsilon = 0.01 result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y) theta, errors, thetas = result # 打印特征点 fittingFig = plt.figure() title = 'polynomial with bgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds' % ( rate, maxLoop, epsilon, timeConsumed) ax = fittingFig.add_subplot(111, title=title) trainingSet = ax.scatter(srcX[:, 1].flatten().A[0], y[:, 0].flatten().A[0])
# encoding=utf-8 import regression from numpy import * xArr, yArr = regression.loadDataSet("filename") ws = regression.standRegres(xArr, yArr) ''' # 绘图 xMat = mat(xArr) yMat = mat(yArr) yHat = xMat * ws import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) xCopy = xMat.copy() xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) plt.show() #相关系数:用来衡量预测值和真实值的匹配程序 corrcoef(yHat.T, yMat) ''' # 得到数据集中所有点的估计 yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003)
# # 复制,排序 # xCopy = xMat.copy() # xCopy.sort(0) # yHat = xCopy * ws # # plot画线 # ax.plot(xCopy[:,1], yHat) # # plt.show() # # #相关系数 # #print('corrcoef:');print(corrcoef(yHat.T,yMat)) #预测值和真实值得匹配程度 # #corrcoef: # # [[ 1. 0.13653777] # # [ 0.13653777 1. ]] # #end:Liner Regression xArr,yArr = regression.loadDataSet('ex0.txt') #对单点进行估计,输出预测值 print('xArr[0]:');print(xArr[0]) # xArr[0]: # [1.0, 0.067732] print(yArr[0]) #output: 3.176513 print(regression.lwlr(xArr[0],xArr,yArr,1.0)) #output: martix([[ 3.12204471]]) print(regression.lwlr(xArr[0],xArr,yArr,0.001)) #output: martix([[ 3.20175729]]) #为了得到数据集里所有点的估计,可以调用LwlrTest()函数: yHat = regression.lwlrTest(xArr,xArr,yArr,0.003) print('all points about yHat:'); print(yHat) #查看拟合效果 xMat = mat(xArr) #xArr是什么? srtInd = xMat[:,1].argsort(0) #对xArr排序
import regression from numpy import * xArr, yArr = regression.loadDataSet('abalone.txt') regression.stageWise(xArr, yArr, 0.01, 200) regression.stageWise(xArr, yArr, 0.001, 5000) xMat = mat(xArr) yMat = mat(yArr).T xMat = regression.regularize(xMat) yM = mean(yMat, 0) yMat = yMat - yM weights = regression.standRegres(xMat, yMat.T) print(weights.T)
from regression import loadDataSet from Multivariate import LinearRegresion from ROOT import * from array import array from math import * xs, ys = loadDataSet('ex0.txt') x0, x1 = zip(*xs) ndata = len(ys) nregr = 100 dregr = 1.0/nregr lr = LinearRegresion(xs,ys) lwlr = LinearRegresion(xs,ys,lambda x,y: exp(-(x-y)**(x-y)/(2*0.01**2)), 0.0001 ) xlr = [ (1.0,dregr*i) for i in range(nregr) ] ylr = map( lr.GetValue, xlr ) xlwlr = xlr ylwlr = map( lwlr.GetValue, xlwlr ) gdata = TGraph( ndata, array('f',x1) , array('f',ys) ) glr = TGraph( nregr, array('f',zip(*xlr)[1]) , array('f',ylr) ) glwlr = TGraph( nregr, array('f',zip(*xlwlr)[1]), array('f',ylwlr) ) gdata.SetMarkerStyle(20) glr .SetLineWidth(2) glwlr.SetLineWidth(2) glr .SetLineColor(kRed)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat May 12 06:19:52 2018 @author: kukuLife """ import numpy as np import regression X, y = regression.loadDataSet('ex1.txt') m, n = X.shape X = np.concatenate((np.ones((m, 1)), X), axis=1) maxLoop = 1500 epsilon = 0.01 rate = 0.02 theta, thetas, errors = regression.sgd(maxLoop, rate, X, y, epsilon)
__author__ = 'sunbeansoft' import regression as reg import matplotlib.pyplot as plt from numpy import * xArr, yArr = reg.loadDataSet('ex0.txt') # wx = reg.standRegres(xArr, yArr) # print wx # # xMat = mat(xArr) # yMat = mat(yArr) # # yHat = xMat * wx # corrcoef(yHat.H, yMat) # # fig = plt.figure() # ax = fig.add_subplot(111) # ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) # xCopy = xMat.copy() # xCopy.sort(0) # yHat = xCopy * wx # ax.plot(xCopy[:, 1], yHat) # plt.show() print reg.lwlr(xArr[0], xArr, yArr, 1.0) print reg.lwlr(xArr[0], xArr, yArr, 0.001) yHat = reg.lwlrTest(xArr, xArr, yArr, 0.003) xMat = mat(xArr) srtInd = xMat[:, 1].argsort(0)
import numpy as np import matplotlib as cm import matplotlib.pyplot as plt import matplotlib.ticker as mtick import regression as re if __name__ == '__main__': X, y = re.loadDataSet("data/ex1.txt") # coursera的《machine learning》第二周实验数据 m, n = X.shape X = np.concatenate((np.ones((m, 1)), X), axis=1) theta, timeConsumed = re.standRegres(X, y) print('消耗[%s] s \n 参数矩阵:\n %s' % (timeConsumed, theta)) fittingFig = plt.figure() title = 'StandRegress time: %s' % timeConsumed ax = fittingFig.add_subplot(111, title=title) trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:, 0].flatten().A[0]) xCopy = X.copy() xCopy.sort(0) yHat = xCopy * theta fittingLine, = ax.plot(xCopy[:, 1], yHat, color='g') ax.set_xlabel('Population of City in 10,000s') ax.set_ylabel('Profit in $10,000s') plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression']) plt.show()
def main(): abX, abY = reg.loadDataSet('abalone.txt') ridgeWeights = ridgeTest(abX, abY) plotFigure(ridgeWeights)
import regression import matplotlib.pyplot as plt from numpy import * xArr,yArr=regression.loadDataSet('ex0.txt') #regression.lwlr(xArr[0],xArr,yArr,1.0) #a = regression.lwlr(xArr[0],xArr,yArr,0.001) yHat1 = regression.lwlrTest(xArr, xArr, yArr,1) yHat2 = regression.lwlrTest(xArr, xArr, yArr,0.01) yHat3 = regression.lwlrTest(xArr, xArr, yArr,0.003) #print("yHat1 : %s" % (yHat1)) xMat=mat(xArr) srtInd = xMat[:,1].argsort(0) #print("srtInd : %s" % (srtInd)) xSort=xMat[srtInd][:,0,:] #print("xSort : %s" % (xSort)) fig = plt.figure() ax1 = fig.add_subplot(311) ax1.plot(xSort[:,1],yHat1[srtInd]) ax1.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red') ax2 = fig.add_subplot(312) ax2.plot(xSort[:,1],yHat2[srtInd]) ax2.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red') ax3 = fig.add_subplot(313) ax3.plot(xSort[:,1],yHat3[srtInd]) ax3.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red')
# This Python file uses the following encoding: utf-8 import os, sys import regression reload(regression) from numpy import * xArr,yArr=regression.loadDataSet('C:\Users\YAN\Desktop\\regression/ex0.txt') ''' #---------标准回归----------# print (xArr[0:2]) ws=regression.standRegres(xArr,yArr) print ws xMat=mat(xArr) yMat=mat(yArr) yHat=xMat*ws import matplotlib.pyplot as pl fig=pl.figure() ax=fig.add_subplot(111) ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0]) xCopy=xMat.copy(0)# sort along the first axis yHat=xCopy*ws ax.plot(xCopy[:,1],yHat) pl.show() ''' #print(corrcoef(yHat.T,yMat)) ''' print yArr[0] print(regression.lwlr(xArr[0],xArr,yArr,1.0)) print(regression.lwlr(xArr[0],xArr,yArr,0.001)) ''' ''' #----------局部回归--------#
import regression from numpy import * xArray, yArray = regression.loadDataSet("./regression/abalone.txt") # yHat01 = regression.lwlrTest(xArray[100:199], xArray[0:99], yArray[0:99], 0.1) # yHat1 = regression.lwlrTest(xArray[100:199], xArray[0:99], yArray[0:99], 1) # yHat10 = regression.lwlrTest(xArray[100:199], xArray[0:99], yArray[0:99], 10) # print regression.rssError(yArray[100:199], yHat01) # print regression.rssError(yArray[100:199], yHat1) # print regression.rssError(yArray[100:199], yHat10) # wsStand = regression.standRegres(xArray[0:99], yArray[0:99]) # yHat = mat(xArray[100:199]) * wsStand # print regression.rssError(yArray[100:199], yHat.T.A) # ridgeWeights = regression.ridgeTest(xArray, yArray) # import matplotlib.pyplot as plt # fig = plt.figure() # ax = fig.add_subplot(111) # ax.plot(ridgeWeights) # plt.show() print regression.stageWise(xArray, yArray, 0.01, 200)
# coding: utf-8 # linear_regression/test_temperature_normal.py import regression as re from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as np if __name__ == '__main__': X, y = re.loadDataSet('data/temperature.txt'); m, n = X.shape X = np.concatenate((np.ones((m, 1)), X), axis=1) rate = 0.0001 maxLoop = 1000 epsilon = 0.01 result, timeConsumed = re.bgd(rate, maxLoop, epsilon, X, y) theta, errors, thetas = result # 绘制拟合曲线 fittingFig = plt.figure() title = 'bgd: rate=%.3f, maxLoop=%d, epsilon=%.3f \n time: %ds' % (rate, maxLoop, epsilon, timeConsumed) ax = fittingFig.add_subplot(111, title=title) trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:, 0].flatten().A[0]) xCopy = X.copy() xCopy.sort(0)
# -*- coding: utf-8 -*- """ Created on Fri May 12 16:07:29 2017 @author: 凯风 """ import regression from numpy import * from imp import reload import matplotlib.pyplot as plt reload(regression) xArr, yArr = regression.loadDataSet('ex0.txt') xArr[0:2] ws = regression.standRegres(xArr, yArr) # 求回归系数 ws xMat = mat(xArr) yMat = mat(yArr) yHat = xMat * ws # 拟合曲线 # 绘制拟合直线和散点图 fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) xCopy = xMat.copy() xCopy.sort(0) yHat = xCopy * ws ax.plot(xCopy[:, 1], yHat) plt.show()
# xArr, yArr = regression.loadDataSet('ex0.txt') # yHat = regression.lwlrTest(xArr,xArr,yArr,0.01) # print yHat # xMat = mat(xArr) # srtInd = xMat[:,1].argsort(0) # xSort = xMat[srtInd][:,0,:] # fig = plt.figure() # ax = fig.add_subplot(111) # ax.plot(xSort[:,1], yHat[srtInd]) # ax.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red') # plt.show() #################### baoyu nianling ############################# abX, abY = regression.loadDataSet('abalone.txt') # yHat01 = regression.lwlrTest(abX[0:99],abX[0:99],abY[0:99],0.1) # yHat02 = regression.lwlrTest(abX[0:99],abX[0:99],abY[0:99],1) # yHat03 = regression.lwlrTest(abX[0:99],abX[0:99],abY[0:99],10) # print regression.rssError(abY[0:99], yHat01.T) # print regression.rssError(abY[0:99], yHat02.T) # print regression.rssError(abY[0:99], yHat03.T) #print regression.ridgeRegres(abX, abY, 1) # ridgeWeights = regression.ridgeTest(abX, abY) # print ridgeWeights # fig = plt.figure()
# -*- coding=utf-8 -*- import regression from numpy import * xArr, yArr = regression.loadDataSet("ex0.txt") #l0 = regression.lwlr(xArr[0], xArr, yArr, 1.0) #l1 = regression.lwlr(xArr[0],xArr,yArr,0.001) #print ("l0 is %s" % l0) #print ("l1 is %s" % l1) yHat = regression.lwlrTest(xArr, xArr, yArr, 1.0) print("yHat is %s" % yHat) xMat = mat(xArr) #axis=0 按列排序;axis=1 按行排序 #返回xMat下标编号 srtInd = xMat[:, 1].argsort(0) #print("srtInd is %s" % srtInd) xSort = xMat[srtInd][:, 0, :] #这是什么意思????? #print ("xSort is %s"% xSort) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xSort[:, 1], mat(yHat[srtInd])) #这里有问题????? ax.scatter(mat(xArr)[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='blue')
# -*- coding:utf-8 -*- import regression from numpy import * abX,abY = regression.loadDataSet("abalone.txt") ridgeWeight = regression.ridgeTest(abX, abY) #print ("ridgeWeight is %s" % ridgeWeight) #展现回归系数与log(lam)的关系 #lam非常小时,与线性回归一致 #lam非常大时,系数全部缩减成0 import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeight) plt.show()
# 预测鲍鱼的年龄 import regression def rssError(yArr, yHatArr): return ((yArr - yHatArr)**2).sum() if __name__ == '__main__': xArr, yArr = regression.loadDataSet('../data/abalone.txt') # yHat01 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 0.1) # yHat1 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 1) # yHat10 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 10) # # 为了分析预测误差的大小,可以用函数 rssError() 计算出这一指标 # print(rssError(yArr[0:99], yHat01.T)) # print(rssError(yArr[0:99], yHat1.T)) # print(rssError(yArr[0:99], yHat10.T)) # print('在新数据上的误差:') # yHat01 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 0.1) # yHat1 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 1) # yHat10 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 10) # print(rssError(yArr[100:199], yHat01.T)) # print(rssError(yArr[100:199], yHat1.T)) # print(rssError(yArr[100:199], yHat10.T)) # 使用岭回归的方式 ridgeWeights = regression.ridgeTest(xArr, yArr) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111)
import regression #xArr,yArr=regression.loadDataSet('ex0.txt') #regValue = regression.lwlr(xArr[0],xArr,yArr,1.0) #print(regValue) #regression.lwlr(xArr[0],xArr,yArr,0.001) xArr,yArr=regression.loadDataSet('abalone.txt') regression.stageWise(xArr,yArr,0.01,200)
denom = xTx + np.eye(np.shape(xMat)[1]) * lam if np.linalg.det(denom) == 0.0: print("This matrix is singular, cannot do inverse") return ws = denom.I * (xMat.T * yMat) return ws def ridgeTest(xArr, yArr): xMat = np.mat(xArr) yMat = np.mat(yArr).T yMean = np.mean(yMat, 0) yMat = yMat - yMean xMeans = np.mean(xMat, 0) xVar = np.var(xMat, 0) xMat = (xMat - xMeans) / xVar numTestPts = 30 wMat = np.zeros((numTestPts, np.shape(xMat)[1])) for i in range(numTestPts): ws = ridgeRegress(xMat, yMat, np.exp(i - 10)) wMat[i, :] = ws.T return wMat abX, abY = regression.loadDataSet('abalone.txt') ridgeWeights = ridgeTest(abX, abY) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeights) plt.show()
import regression import matplotlib.pyplot as plt from numpy import * xArr,yArr=regression.loadDataSet('/home/everton.gago/projetos/braskem/data/data.csv') ws = regression.standRegres(xArr,yArr) xMat=mat(xArr) yMat=mat(yArr) yHat = xMat*ws corr = corrcoef(yHat.T, yMat) print 'Corr Coef: ', corr fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:,1].flatten().A[0], yMat.T[:,0].flatten().A[0]) xCopy=xMat.copy() xCopy.sort(0) yHat=xCopy*ws ax.plot(xCopy[:,1],yHat) plt.show()