Exemple #1
0
        xMat = mat(xMat, dtype = float)        
        yMat = mat(yMat, dtype = float)

        #Regularize the matrix 
        xMat[:, 1:], xMeans, xStd = regression.regularize(xMat[:, 1:])         
        xCheckMat[:, 1:] = (xCheckMat[:, 1:] - xMeans)/xStd
        
        preDictPerComponent = []
        for yIdx in range(0, 2): #caculate computing result and then communication result
            yMatTmp = yMat[:, yIdx]       
            
            taskPredict = 0
            for kElement in bestKList:
                taskPredict = 0
                #get the result for cared data
                wr = regression.lwlr(xCheckMat, xMat,yMatTmp.T,kElement)
                #wr = regression.standRegres(xMat,yMatTmp.T)
                if (wr != None):
                    #print wr
                    taskPredict = xCheckMat * wr
                    #print taskPredict
                    #print "Task Size: %d, Predict Hours: %d Result[%d] is %f with k %f" % (tTSize, predictHours, yIdx, taskPredict, kElement)
                    #print wr
                    break                       
            if taskPredict != 0:
                tmp = taskPredict.reshape(-1).tolist() #sum only used for get the value but not the matrix
                tmp = [j for i in tmp for j in i]
                preDictPerComponent.extend(tmp) 
            else:
                print "Error! Task Size: %d, Predict Hours: %d Task %d can not get predict value" % (tTSize, predictHours, yIdx)
        
Exemple #2
0
fr = open("/home/yu/workspace/machine-learning-class/ex1/ex1data2.txt")
xMat = []
yMat = []
for line in fr.readlines():
    tmpEle = line.split(',')
    xMat.append([1, tmpEle[0], tmpEle[1]])
    yMat.append([tmpEle[2]])

bestKList = [
    10
]  #[0.07, 0.3, 0.1, 0.7, 3, 10, 28, 40, 60, 80, 100]#[100, 80, 60, 40, 28, 10, 3, 0.7, 0.3, 0.07, 0.01]

xCheckMat = mat([1, 1650, 3], dtype=float)
xMat = mat(xMat, dtype=float)
yMat = mat(yMat, dtype=float)

#Regularize the matrix
xMat[:, 1:], xMeans, xStd = regression.regularize(xMat[:, 1:])
xCheckMat[:, 1:] = (xCheckMat[:, 1:] - xMeans) / xStd
for kElement in bestKList:
    taskPredict = 0
    #get the result for cared data
    wr = regression.lwlr(xCheckMat, xMat, yMat.T, kElement)
    #wr = regression.standRegres(xMat,yMat.T)
    if (wr != None):
        print wr
        taskPredict = xCheckMat * wr
        print taskPredict
        break
# # plt.show()
#
# #相关系数
# #print('corrcoef:');print(corrcoef(yHat.T,yMat))   #预测值和真实值得匹配程度
# #corrcoef:
# # [[ 1.          0.13653777]
# # [ 0.13653777  1.        ]]
# #end:Liner Regression

xArr,yArr = regression.loadDataSet('ex0.txt')
#对单点进行估计,输出预测值
print('xArr[0]:');print(xArr[0])
#  xArr[0]:
# [1.0, 0.067732]
print(yArr[0])  #output: 3.176513
print(regression.lwlr(xArr[0],xArr,yArr,1.0))  #output:  martix([[ 3.12204471]])
print(regression.lwlr(xArr[0],xArr,yArr,0.001))     #output:  martix([[ 3.20175729]])

#为了得到数据集里所有点的估计,可以调用LwlrTest()函数:
yHat = regression.lwlrTest(xArr,xArr,yArr,0.003)
print('all points about yHat:');    print(yHat)

#查看拟合效果
xMat = mat(xArr)   #xArr是什么?
srtInd = xMat[:,1].argsort(0) #对xArr排序
xSort = xMat[srtInd][:,0,:]
#绘图:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:,1],yHat[srtInd])
ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],s=2,c='red')
yHat = xMat * ws

#绘出数据集散点图和最佳拟合直线图
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])

#为了绘制计算出的最佳拟合曲线,需要绘出yHat的值
#若直线上的数据点次序混乱,绘图时将会出现问题,固要先将点按照升序排列
xCopy = xMat.copy()
xCopy.sort(0)  #这个应该是np中的sort,意思是按照0维度排序
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()

#对单点进行估计
print yArr[0]
print regression.lwlr(xArr[0], xArr, yArr, 1.0)
print regression.lwlr(xArr[0], xArr, yArr, 0.001)

#得到所有点的估计
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()
Exemple #5
0
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()

# 判断模型效果
yHat = xMat * ws
corrcoef(yHat.T, yMat)  # 看[0,1]和[1,0]位置,因为自己和自己相关性肯定是1

# 测试局部加权线性回归
reload(regression)
xArr, yArr = regression.loadDataSet('ex0.txt')
regression.lwlr(xArr[0], xArr, yArr, 1.0)  # 单点测试
regression.lwlr(xArr[0], xArr, yArr, 0.001)
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.5)  # 预测xArr,可以给不同的k测试不同的效果

# 绘制估计值和原始值
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()

# 在真实数据上
reload(regression)
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()

yHat = xMat * ws
print "corrcoef(yHat.T,yMat):", corrcoef(yHat.T, yMat)

#8.2 局部加权线性回归
xArr, yArr = regression.loadDataSet(homedir + 'ex0.txt')
print "yArr[0]:", yArr[0]
print "xArr[0]:", xArr[0]
print "regression.lwlr(xArr[0],xArr,yArr,1.0):", regression.lwlr(
    xArr[0], xArr, yArr, 1.0)
print "regression.lwlr(xArr[0],xArr,yArr,0.001):", regression.lwlr(
    xArr[0], xArr, yArr, 0.001)
print ":",
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.02)
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
# print "xMat",xMat
# print "srtInd:",srtInd
# print "xSort:",xSort
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()
#
# import matplotlib.pyplot as plt
#
# fig=plt.figure()
# ax=fig.add_subplot(111)
#
# ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
#
# xCopy=xMat.copy();
# xCopy.sort(0)
# yHat=xCopy*ws
#
# ax.plot(xCopy[:,1],yHat)
# plt.show()

ws = regression.lwlr(xArr[0], xArr, yArr, 1.0)
print(ws)

yHat = regression.lwlrTest(xArr, xArr, yArr, 0.03)
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]

import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)

ax.scatter(xMat[:, 1].flatten().A[0],
           mat(yArr).T[:, 0].flatten().A[0],
           s=2,
Exemple #8
0
    fig = plt.figure()  #创建画布
    ax = fig.add_subplot(211)
    #ax = fig.add_subplot(349) 参数349的意思是:将画布分割成3行4列,图像画在从左到右从上到下的第9块,
    # 3410是不行的,可以用另一种方式(3,4,10)。
    # ax = fig.add_subplot(2,1,1)
    # ax.plot(x,y)
    # ax = fig.add_subplot(2,2,3)
    # ax.plot(x,y)
    # plt.show()  显示多图
    ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
    xCopy = xMat.copy()
    xCopy.sort(0)
    yHat = xCopy * ws
    ax.plot(xCopy[:, 1], yHat)
    print corrcoef(yHat.T, yMat)  # 转置 保证都是行向量

    xArrL, yArrL = regression.loadDataSet('ex0.txt')
    yArrL[0]
    regression.lwlr(xArrL[0], xArrL, yArrL, 1.0)
    yHatL = regression.lwlrTest(xArrL, xArrL, yArrL, 0.01)

    xMatL = mat(xArrL)
    yMatL = mat(yArrL)
    srtInd = xMatL[:, 1].argsort(0)
    xSort = xMatL[srtInd][:, 0, :]
    ax = fig.add_subplot(2, 1, 2)
    ax.scatter(xMatL[:, 1].flatten().A[0], yMatL.T[:, 0].flatten().A[0])
    ax.plot(xSort[:, 1], yHatL[srtInd])
    plt.show()
    print corrcoef(yHatL.T, yMatL)
Exemple #9
0
import regression
xArr, yArr = regression.loadDataSet()
# print(xArr[0:2])
w = regression.standRegres(xArr, yArr)
# print(w)

print(yArr[0], regression.lwlr(xArr[0], xArr, yArr, 0.001))

yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003)
print(yHat)
xMat = np.mat(xArr)
srtInd = xMat[:, 1].argsort(0)  #?
xSort = xMat[srtInd][:, 0, :]  #?

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()

xMat = mat(dm)
yMat = mat(ls)
yHat = xMat * ws
corrcoef(yHat.T, yMat)  # 求相关系数

# 局部加权线性回归
import regression
from numpy import *
dm, ls = regression.loadDataSet('ex0.txt')
ls[0]
regression.lwlr(dm[0], dm, ls, 1.0)
yHat = regression.lwlrTest(dm, dm, ls, 0.01)

xMat = mat(dm)
strInd = xMat[:, 1].argsort(0)
xSort = xMat[strInd][:, 0, :]
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[strInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(ls).T.flatten().A[0], s=2, c='red')
plt.show()

# 预测鲍鱼寿命
import regression
from numpy import *
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

'8.2'

__author__ = 'lxp'

import regression
import numpy as np
import matplotlib.pyplot as plt

xArr, yArr = regression.loadDataSet('ex0.txt')
print(regression.lwlr(xArr[0], xArr, yArr, 1))
print(regression.lwlr(xArr[0], xArr, yArr, 0.001))
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003)
xMat = np.mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0],
           np.mat(yArr).T.flatten().A[0],
           s=2,
           c='red')
plt.show()
Exemple #12
0
    xArr_origin, yArr_origin = regression.loadDataSet("foo.txt")
    # print(yArr_origin)
    xArr = []
    yArr = []
    m = (shape(xArr_origin))[0]
    yHat = zeros(m)

    xArr.append(xArr_origin[0][:])
    yArr.append(yArr_origin[0])
    yHat[0] = yArr_origin[0]

    for i in range(1, m):
        # k = best_k(xArr, yArr)
        k = 0.06830078125
        x = xArr_origin[i][:]
        y = regression.lwlr(x, xArr, yArr, k)
        # print(y.flatten().A[0][0])
        yHat[i] = y.flatten().A[0][0]
        while yHat[i] <= 0:
            k = k + 0.01
            y = regression.lwlr(x, xArr, yArr, k)
            yHat[i] = y.flatten().A[0][0]
        xArr.append(x)
        yArr.append(yArr_origin[i])
        print(i, k, yHat[i], yArr[i])
        with open('workfile', 'a') as f:
            f.write(str(i))
            f.write("\t")
            f.write(str(k))
            f.write("\t")
            f.write(str(yHat[i]))
xMat, yMat = mat(xArr), mat(yArr)
yHat = xMat * ws
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0], yMat.T[:,0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:,1], yHat)
plt.show()

yHat = xMat * ws
print corrcoef(yHat.T, yMat)

xArr, yArr = regression.loadDataSet('ex0.txt')
print yArr[0]
print regression.lwlr(xArr[0], xArr, yArr, 1.0)
print regression.lwlr(xArr[0], xArr, yArr, 0.001)
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.01)
# 1.0 与标准回归一致 欠拟合
# 0.003 过拟合
xMat = mat(xArr)
srtInd = xMat[:,1].argsort(0)
xSort = xMat[srtInd][:,0,:]
#import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:,1], yHat[srtInd])
ax.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()
Exemple #14
0
# wx = reg.standRegres(xArr, yArr)
# print wx
#
# xMat = mat(xArr)
# yMat = mat(yArr)
#
# yHat = xMat * wx
# corrcoef(yHat.H, yMat)
#
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
# xCopy = xMat.copy()
# xCopy.sort(0)
# yHat = xCopy * wx
# ax.plot(xCopy[:, 1], yHat)
# plt.show()

print reg.lwlr(xArr[0], xArr, yArr, 1.0)
print reg.lwlr(xArr[0], xArr, yArr, 0.001)

yHat = reg.lwlrTest(xArr, xArr, yArr, 0.003)
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show();