Esempio n. 1
0
def best_k(xArr, yArr):
    k_small = 0.01
    k_big = 2.0
    k = (k_big + k_small) / 2.0
    while True:
        yHat = regression.lwlrTest(xArr, xArr, yArr, k)
        yHat_small = regression.lwlrTest(xArr, xArr, yArr, k_small)
        yHat_big = regression.lwlrTest(xArr, xArr, yArr, k_big)
        re = regression.rssError(yArr, yHat)
        re_small = regression.rssError(yArr, yHat_small)
        re_big = regression.rssError(yArr, yHat_big)
        if re_small > re and re_big > re:
            k_big = k + (k_big - k) / 2.0
            k_small = k_small + (k - k_small) / 2.0
        elif re_small > re and re_big < re:
            k_small = k
            k = (k_big + k_small) / 2.0
        elif re_small < re and re_big > re:
            k_big = k
            k = (k_big + k_small) / 2.0
        else:
            k_big = k + (k_big - k) / 2.0
            k_small = k_small + (k - k_small) / 2.0
        if k_big - k_small < 0.01:
            k = k_small
            break
    return k
Esempio n. 2
0
def privateShow(ax, xMat, yMat, k):
    ax.scatter(xMat[:, 1].flatten().A[0],
               yMat.T[:, 0].flatten().A[0],
               c='purple',
               label='realData',
               marker='.')
    ind = xMat[:, 1].argsort(0)  # 获取按第一列排序后的索引
    newXmat = xMat[ind][:, 0, :]  # 按索引排序后会升一个维度,所以降一个维度
    newYmat = yMat.T[ind][:, 0, :].T
    yHat = re.lwlrTest(newXmat, newXmat.A, newYmat.A, k)
    ax.plot(newXmat[:, 1], yHat, c='green')
Esempio n. 3
0
def test2():
    xArr, yArr = regression.loadDataSet('ex0.txt')
    yHat = regression.lwlrTest(xArr, xArr, yArr, 0.01)
    xMat = np.mat(xArr)
    srtInd = xMat[:, 1].argsort(0)
    xSort = xMat[srtInd][:, 0, :]
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(xSort[:, 1], yHat[srtInd])
    ax.scatter(xMat[:, 1].flatten().A[0],
               np.mat(yArr).T.flatten().A[0],
               s=2,
               c='red')
    plt.show()
Esempio n. 4
0
def lwlrResult(fileName, weight):
    xArr, yArr = regression.loadDataSet(fileName)
    yHat = regression.lwlrTest(xArr, xArr, yArr, weight)  #取得各点的回归系数矩阵
    # 画出回归曲线
    xMat = mat(xArr)
    srtInd = xMat[:, 1].argsort(0)
    xSort = xMat[srtInd][:, 0, :]
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(xSort[:, 1], yHat[srtInd])
    ax.scatter(xMat[:, 1].flatten().A[0],
               mat(yArr).T.flatten().A[0],
               s=2,
               c='red')
    plt.show()
Esempio n. 5
0
def test1():
    abX, abY = regression.loadDataSet('abalone.txt')
    yHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
    yHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)
    yHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)
    print(regression.rssError(abY[0:99], yHat01.T))
    print(regression.rssError(abY[0:99], yHat1.T))
    print(regression.rssError(abY[0:99], yHat10.T))
    print('-------------------------------------------')
    yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
    yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
    yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
    print(regression.rssError(abY[100:199], yHat01.T))
    print(regression.rssError(abY[100:199], yHat1.T))
    print(regression.rssError(abY[100:199], yHat10.T))
def abaloneTest():
    """ 预测鲍鱼的年龄

    描述:机器学习实战示例8.3 预测鲍鱼的年龄
    INPUT:
        无
    OUPUT: 
        无 
    """
    # 加载数据
    abX, abY = regression.loadDataSet("./data/abalone.txt")
    # 使用不同的核进行预测
    oldyHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
    oldyHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)
    oldyHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)
    # 打印出不同的核预测值与训练数据集上的真实值之间的误差大小
    print("old yHat01 error Size is :",
          regression.rssError(abY[0:99], oldyHat01.T))
    print("old yHat1 error Size is :",
          regression.rssError(abY[0:99], oldyHat1.T))
    print("old yHat10 error Size is :",
          regression.rssError(abY[0:99], oldyHat10.T))
    # 打印出不同的核预测值与新数据集(测试数据集)上的真实值之间的误差大小
    newyHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
    print("new yHat01 error Size is :",
          regression.rssError(abY[0:99], newyHat01.T))
    newyHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
    print("new yHat1 error Size is :",
          regression.rssError(abY[0:99], newyHat1.T))
    newyHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
    print("new yHat10 error Size is :",
          regression.rssError(abY[0:99], newyHat10.T))
    # 使用简单的线性回归进行预测,与上面的计算进行比较
    standWs = regression.standRegres(abX[0:99], abY[0:99])
    standyHat = mat(abX[100:199]) * standWs
    print("standRegress error Size is:",
          regression.rssError(abY[100:199], standyHat.T.A))
Esempio n. 7
0
ax.plot(xCopy[:, 1], yHat)
plt.show()

# 计算预测值和真实值的相关性
yHat = xMat * ws
corrcoef(yHat.T, yMat)

# 测试LWLR
from importlib import reload

reload(regression)
xArr, yArr = regression.loadDataSet('ex0.txt')
yArr[0]
regression.lwlr(xArr[0], xArr, yArr, 1.0)
regression.lwlr(xArr[0], xArr, yArr, 0.001)
yHat = regression.lwlrTest(xArr, xArr, yArr, 1.0)
# yHat = regression.lwlrTest(xArr, xArr, yArr, 0.01)
# yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003)
# 查看yHat的拟合效果
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
# 绘图
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()

# 示例:鲍鱼年龄
import regression
Esempio n. 8
0
import regression
import matplotlib.pyplot as plt
from numpy import *
xArr,yArr=regression.loadDataSet('ex0.txt')

#regression.lwlr(xArr[0],xArr,yArr,1.0)
#a = regression.lwlr(xArr[0],xArr,yArr,0.001)
yHat1 = regression.lwlrTest(xArr, xArr, yArr,1)
yHat2 = regression.lwlrTest(xArr, xArr, yArr,0.01)
yHat3 = regression.lwlrTest(xArr, xArr, yArr,0.003)
#print("yHat1 : %s" % (yHat1))

xMat=mat(xArr)
srtInd = xMat[:,1].argsort(0)
#print("srtInd : %s" % (srtInd))

xSort=xMat[srtInd][:,0,:]
#print("xSort : %s" % (xSort))

fig = plt.figure()
ax1 = fig.add_subplot(311)
ax1.plot(xSort[:,1],yHat1[srtInd])
ax1.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red')

ax2 = fig.add_subplot(312)
ax2.plot(xSort[:,1],yHat2[srtInd])
ax2.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red')

ax3 = fig.add_subplot(313)
ax3.plot(xSort[:,1],yHat3[srtInd])
ax3.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red')
xMat, yMat = mat(xArr), mat(yArr)
yHat = xMat * ws
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0], yMat.T[:,0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:,1], yHat)
plt.show()

yHat = xMat * ws
print corrcoef(yHat.T, yMat)

xArr, yArr = regression.loadDataSet('ex0.txt')
print yArr[0]
print regression.lwlr(xArr[0], xArr, yArr, 1.0)
print regression.lwlr(xArr[0], xArr, yArr, 0.001)
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.01)
# 1.0 与标准回归一致 欠拟合
# 0.003 过拟合
xMat = mat(xArr)
srtInd = xMat[:,1].argsort(0)
xSort = xMat[srtInd][:,0,:]
#import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:,1], yHat[srtInd])
ax.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()
Esempio n. 10
0
# print(corrcoef(yHat.T, yMat))
'''
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])

xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()
'''

# LWLR with Gaussion Kernel
# k = 0.01 to get a perfect regression value
yHat = regression.lwlrTest(xArray, xArray, yArray, 0.01)

# print(yHat)

sortInd = xMat[:, 1].argsort(0)
xSort = xMat[sortInd][:, 0, :]

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[sortInd])
ax.scatter(xMat[:, 1].flatten().A[0],
           mat(yArray).T.flatten().A[0],
           s=2,
           c='red')
plt.show()
Esempio n. 11
0
    fig = plt.figure()  #创建画布
    ax = fig.add_subplot(211)
    #ax = fig.add_subplot(349) 参数349的意思是:将画布分割成3行4列,图像画在从左到右从上到下的第9块,
    # 3410是不行的,可以用另一种方式(3,4,10)。
    # ax = fig.add_subplot(2,1,1)
    # ax.plot(x,y)
    # ax = fig.add_subplot(2,2,3)
    # ax.plot(x,y)
    # plt.show()  显示多图
    ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
    xCopy = xMat.copy()
    xCopy.sort(0)
    yHat = xCopy * ws
    ax.plot(xCopy[:, 1], yHat)
    print corrcoef(yHat.T, yMat)  # 转置 保证都是行向量

    xArrL, yArrL = regression.loadDataSet('ex0.txt')
    yArrL[0]
    regression.lwlr(xArrL[0], xArrL, yArrL, 1.0)
    yHatL = regression.lwlrTest(xArrL, xArrL, yArrL, 0.01)

    xMatL = mat(xArrL)
    yMatL = mat(yArrL)
    srtInd = xMatL[:, 1].argsort(0)
    xSort = xMatL[srtInd][:, 0, :]
    ax = fig.add_subplot(2, 1, 2)
    ax.scatter(xMatL[:, 1].flatten().A[0], yMatL.T[:, 0].flatten().A[0])
    ax.plot(xSort[:, 1], yHatL[srtInd])
    plt.show()
    print corrcoef(yHatL.T, yMatL)
Esempio n. 12
0
xArr, yArr = regression.loadDataSet('abalone.txt')
'''
ws = regression.standRegres(xArr, yArr)
xMat = mat(xArr)
yMat = mat(yArr)
yHat = xMat*ws
'''
#Retry by lwlr to get best k

corrcoefMin=100
bestK=1
keysets = [0.1,1,10,0.02,0.3]

for step in keysets:   
    print(step) 
    yHat = regression.lwlrTest(xArr[4000:],xArr[0:4000],yArr[0:4000],step)
    if(sum(yHat) != 0):
        if(corrcoefMin >= linalg.det(corrcoef(yHat.T, yArr[4000:]))):
            corrcoefMin = linalg.det(corrcoef(yHat.T, yArr[4000:]))
            bestK=step
        print(regression.rssError(yArr[4000:], yHat.T))
print("=======================")
print(bestK)
print(corrcoefMin)

'''
fig = plt.figure()
ax = fig.add_subplot(111)

ax.scatter(xMat[:,1], yMat.T[:,0])
Esempio n. 13
0
# wx = reg.standRegres(xArr, yArr)
# print wx
#
# xMat = mat(xArr)
# yMat = mat(yArr)
#
# yHat = xMat * wx
# corrcoef(yHat.H, yMat)
#
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
# xCopy = xMat.copy()
# xCopy.sort(0)
# yHat = xCopy * wx
# ax.plot(xCopy[:, 1], yHat)
# plt.show()

print reg.lwlr(xArr[0], xArr, yArr, 1.0)
print reg.lwlr(xArr[0], xArr, yArr, 0.001)

yHat = reg.lwlrTest(xArr, xArr, yArr, 0.003)
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show();
Esempio n. 14
0
    print "Start compute taskIdx %d:" % taskIdx

    for yIdx in range(
            0, 2):  #caculate computing result and then communication result
        bestKList = [
        ]  #we may meet sigular case while we predict, so we save 10 good k and try 10 times. [lowestError, bestK]
        invalidKNum = 0
        invalidKMin = inf
        invalidKMax = 0.009

        yMatTmp = yMat[:, yIdx]
        yTMatTmp = yTMat[:, yIdx]

        for k in arange(5, 0.09, -0.1):  ##find best k
            yAssume = regression.lwlrTest(xTMat, xMat, yMatTmp.T, k)
            print k
            if yAssume.all() == 0:
                #print("%s  %d: regression.lwlr failed by k = %f." %(myDebug.file(), myDebug.line(), k))
                invalidKNum += 1
                if k > invalidKMax:
                    invalidKMax = k
                if k < invalidKMin:
                    invalidKMin = k
                continue
            #transfer Mat to list
            yTList = yTMatTmp.reshape(-1).tolist()
            yTList = [j for i in yTList for j in i]
            rssE = regression.rssError(yTList, yAssume)
            if len(bestKList) == 0:
                bestKList.insert(0, [rssE, k])
Esempio n. 15
0
# -*- coding=utf-8 -*-
import regression
from numpy import *

xArr, yArr = regression.loadDataSet("ex0.txt")

#l0 = regression.lwlr(xArr[0], xArr, yArr, 1.0)
#l1 = regression.lwlr(xArr[0],xArr,yArr,0.001)
#print ("l0 is %s" % l0)
#print ("l1 is %s" % l1)

yHat = regression.lwlrTest(xArr, xArr, yArr, 1.0)
print("yHat is %s" % yHat)

xMat = mat(xArr)
#axis=0 按列排序;axis=1 按行排序
#返回xMat下标编号
srtInd = xMat[:, 1].argsort(0)
#print("srtInd is %s"  % srtInd)

xSort = xMat[srtInd][:, 0, :]  #这是什么意思?????
#print ("xSort is %s"% xSort)

import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], mat(yHat[srtInd]))  #这里有问题?????
ax.scatter(mat(xArr)[:, 1].flatten().A[0],
           mat(yArr).T.flatten().A[0],
           s=2,
           c='blue')
Esempio n. 16
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

'8.3 mechinelearing in action'

__author__ = 'lxp'

import regression
import numpy as np

abX, abY = regression.loadDataSet('abalone.txt')
yHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
yHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)
yHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)
print(regression.rssError(abY[0:99], yHat01.T))
print(regression.rssError(abY[0:99], yHat1.T))
print(regression.rssError(abY[0:99], yHat10.T))

yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
print(regression.rssError(abY[100:199], yHat01.T))
print(regression.rssError(abY[100:199], yHat1.T))
print(regression.rssError(abY[100:199], yHat10.T))

ws = regression.standRegres(abX[0:99], abY[0:99])
yHat = np.mat(abX[100:199]) * ws
print(regression.rssError(abY[100:199], yHat.T.A))
Esempio n. 17
0
# -*- coding=utf-8 -*-
import regression
from numpy import *

abX,abY = regression.loadDataSet("abalone.txt")
yHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
yHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1.0)
yHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)

error01 = regression.rssError(abY[0:99], yHat01)
error1 = regression.rssError(abY[0:99], yHat1)
error10 = regression.rssError(abY[0:99], yHat10)

#结论,使用较小的核可以得到较低的误差
#但较小的核会造成过拟合的,对新数据不定能达到最好的预测效果
print ("error01 is %s"  % error01)     #error01 is 56.7862596807
print ("error1 is %s"  % error1)         #error1 is 429.89056187
print ("error10 is %s"  % error10)     #error10 is 549.118170883

yyHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
yyHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1.0)
yyHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
eerror01 = regression.rssError(abY[100:199], yyHat01)
eerror1 = regression.rssError(abY[100:199], yyHat1)
eerror10 = regression.rssError(abY[100:199], yyHat10)
print ("eerror01 is %s"  % eerror01)     #eerror01 is 33652.8973161
print ("eerror1 is %s"  % eerror1)         #eerror1 is 573.52614419
print ("eerror10 is %s"  % eerror10)     #eerror10 is 517.571190538       #对新数据,k=10得到较好的效果


#和线性做比较
Esempio n. 18
0
def calcErr(xArr, yArr, sIdx, eIdx, k, testStartIdx, testEndIdx, ws):
    yHat = re.lwlrTest(xArr[testStartIdx:testEndIdx], xArr[sIdx:eIdx],
                       yArr[sIdx:eIdx], k)
    print('k=%f: %f' % (k, re.rssErr(yArr[testStartIdx:testEndIdx], yHat)))
    print('标准: %f' % (re.rssErr(yArr[testStartIdx:testEndIdx],
                                (mat(xArr[testStartIdx:testEndIdx]) * ws).T)))
#conding=utf-8
from numpy import *
import regression
"""
案例一:我们将回归用于真实数据
"""
if __name__ == '__main__':
    """#####################################################################################################################"""
    xArr, yArr = regression.loadDataSet(
        r'C:\Users\v_wangdehong\PycharmProjects\MachineLearning_V\Regression\data\abalone.txt'
    )
    #使用前99行数据测试算法
    yHat01 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 0.1)
    yHat1 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 1)
    yHat10 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 10)
    print(regression.rssError(yArr[0:99], yHat01))  #56.7842091184
    print(regression.rssError(yArr[0:99], yHat1))  #429.89056187
    print(regression.rssError(yArr[0:99], yHat10))  #549.118170883
    """
    从上面可以看到,使用较小的核将得到较低的误差,那么为什么不在所有数据集上都使用最小的核呢?
    因为使用最小的核将造成过拟合,对新数据不一定能达到最好的效果,下面就看看它在新数据上的表现
    """
    yHat01 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 0.1)
    yHat1 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 1)
    yHat10 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 10)
    print(regression.rssError(yArr[100:199], yHat01))  # 25119.4591112
    print(regression.rssError(yArr[100:199], yHat1))  # 573.52614419
    print(regression.rssError(yArr[100:199], yHat10))  # 517.571190538
    """
    从上面结果可以看到,核大小等于10时测试误差最小,但是它在训练集上的误差却是最大的。
    接下来再和简单的线性回归做个比较。
Esempio n. 20
0
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()

# 判断模型效果
yHat = xMat * ws
corrcoef(yHat.T, yMat)  # 看[0,1]和[1,0]位置,因为自己和自己相关性肯定是1

# 测试局部加权线性回归
reload(regression)
xArr, yArr = regression.loadDataSet('ex0.txt')
regression.lwlr(xArr[0], xArr, yArr, 1.0)  # 单点测试
regression.lwlr(xArr[0], xArr, yArr, 0.001)
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.5)  # 预测xArr,可以给不同的k测试不同的效果

# 绘制估计值和原始值
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()

# 在真实数据上
reload(regression)
abX, abY = regression.loadDataSet('abalone.txt')
yHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
Esempio n. 21
0
    #print("srtInd============")
    #print(srtInd)
    #print("xMat============")
    #print(xMat)
    #print("xSort===========")
    #print(xSort)
    yMat = mat(yArr)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    #print(xArr[:,1].flatten().A[0])
    #print(yArr.T[:,0].flatten().A[0])
    ax.scatter(xMat[:, 1].flatten().A[0],
               yMat.T[:, 0].flatten().A[0],
               s=2,
               c='red')
    xCopy = xMat.copy()
    #yHat=xCopy*ws
    yHat = regress.lwlrTest(xArr, xArr, yArr, float(k))
    #print("=============yMat=====================")
    #print(yHat)
    #print(yHat[srtInd])
    #yHat[srtInd]表示将yHat按照srtInt为下标进行排序,yHat是一个1*n的矩阵,yHat[srtInd]是一个n*1的矩阵
    ax.plot(xSort[:, 1], yHat[srtInd])
    plt.savefig(str(k) + '.jpg')
    #print("==============")
    #print(yMat)
    #print(yHat.T)
    cor = corrcoef(yHat.T, yMat)
    print(cor)
#
# #相关系数
# #print('corrcoef:');print(corrcoef(yHat.T,yMat))   #预测值和真实值得匹配程度
# #corrcoef:
# # [[ 1.          0.13653777]
# # [ 0.13653777  1.        ]]
# #end:Liner Regression

xArr,yArr = regression.loadDataSet('ex0.txt')
#对单点进行估计,输出预测值
print('xArr[0]:');print(xArr[0])
#  xArr[0]:
# [1.0, 0.067732]
print(yArr[0])  #output: 3.176513
print(regression.lwlr(xArr[0],xArr,yArr,1.0))  #output:  martix([[ 3.12204471]])
print(regression.lwlr(xArr[0],xArr,yArr,0.001))     #output:  martix([[ 3.20175729]])

#为了得到数据集里所有点的估计,可以调用LwlrTest()函数:
yHat = regression.lwlrTest(xArr,xArr,yArr,0.003)
print('all points about yHat:');    print(yHat)

#查看拟合效果
xMat = mat(xArr)   #xArr是什么?
srtInd = xMat[:,1].argsort(0) #对xArr排序
xSort = xMat[srtInd][:,0,:]
#绘图:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:,1],yHat[srtInd])
ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],s=2,c='red')
plt.show()
Esempio n. 23
0
# -*- coding=utf-8 -*-
import regression
from numpy import *

xArr,yArr = regression.loadDataSet("ex0.txt")

#l0 = regression.lwlr(xArr[0], xArr, yArr, 1.0)
#l1 = regression.lwlr(xArr[0],xArr,yArr,0.001)
#print ("l0 is %s" % l0)
#print ("l1 is %s" % l1)



yHat = regression.lwlrTest(xArr, xArr, yArr, 1.0)
print ("yHat is %s"  % yHat)

xMat = mat(xArr)
#axis=0 按列排序;axis=1 按行排序
#返回xMat下标编号
srtInd = xMat[:,1].argsort(0)
#print("srtInd is %s"  % srtInd)




xSort = xMat[srtInd][:,0,:]    #这是什么意思?????
#print ("xSort is %s"% xSort)



import matplotlib.pyplot as plt