예제 #1
0
def testGaussWeight():
    xArr, yArr = re.loadDataSet('ex0.txt')
    xMat = mat(xArr)
    yMat = mat(yArr)

    fig = plt.figure()
    ax = fig.add_subplot(411)  # 将画布分成1行1列,将从左到右,从上到下第一块显示图画
    plt.yticks(linspace(2.5, 5, 6))
    ax.scatter(xMat[:, 1].flatten().A[0],
               yMat.T[:, 0].flatten().A[0],
               c='purple',
               label='realData',
               marker='.')

    xMat.sort(0)
    weights = re.getGaussWeightArr([1, 0.5], xMat, 0.5)
    ax = fig.add_subplot(412)
    plt.yticks(linspace(0.6, 1, 9))
    ax.plot(xMat[:, 1], weights)
    plt.text(0.7, 0.75, r'$k=0.5$')

    weights = re.getGaussWeightArr([1, 0.5], xMat, 0.1)
    ax = fig.add_subplot(413)
    plt.yticks(linspace(0, 1, 6))
    ax.plot(xMat[:, 1], weights)
    plt.text(0.7, 0.5, r'$k=0.1$')

    weights = re.getGaussWeightArr([1, 0.5], xMat, 0.01)
    ax = fig.add_subplot(414)
    plt.yticks(linspace(0, 1, 6))
    ax.plot(xMat[:, 1], weights, label='k=0.01')
    plt.text(0.7, 0.5, r'$k=0.01$')
    plt.show()
예제 #2
0
def main():
    abX, abY = reg.loadDataSet('abalone.txt')
    print '------------------------training----------------------'
    yHat01 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
    error01 = rssErr(abY[0:99], yHat01.T)
    yHat1 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)
    error1 = rssErr(abY[0:99], yHat1.T)
    yHat10 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)
    error10 = rssErr(abY[0:99], yHat10.T)
    #the result show smaller kernel
    print "yHat01:", yHat01, "error01:", error01
    print "yHat1", yHat1, "error1:", error1
    print "yHat10", yHat10, "error10", error10

    print '------------------------testing-------------------------'
    yHat01 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
    error01 = rssErr(abY[100:199], yHat01.T)
    yHat1 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
    error1 = rssErr(abY[100:199], yHat1.T)
    yHat10 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
    error10 = rssErr(abY[100:199], yHat10.T)
    #the result show smaller kernel
    print "error01:", error01
    print "error1:", error1
    print "error10", error10
예제 #3
0
def testStandRegres():
    xArr, yArr = re.loadDataSet('ex0.txt')
    ws = re.standRegres(xArr, yArr)
    xMat = mat(xArr)
    yMat = mat(yArr)
    fig = plt.figure()
    ax = fig.add_subplot(111)  # 将画布分成1行1列,将从左到右,从上到下第一块显示图画
    ax.scatter(xMat[:, 1].flatten().A[0],
               yMat.T[:, 0].flatten().A[0],
               c='purple',
               label='realData',
               marker='.')
    # scatter散点图
    # matrix[a:b,c:d]  第a到b行,且第c到d列 左闭右开
    # matrix[a,b] 第a行,第b列
    xCopy = xMat.copy()
    # xCopy.sort(0)#y轴方向排序
    yHat = xCopy * ws
    ax.plot(xCopy[:, 1], yHat, c='green')
    # print yHat.T.flatten().A[0].size
    # print yMat.flatten().A[0].size
    # print yHat
    # print yMat
    correlation = corrcoef(yHat.T, yMat)
    print correlation
    plt.show()
예제 #4
0
def main():
    abX, abY = reg.loadDataSet("abalone.txt")
    print "------------------------training----------------------"
    yHat01 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
    error01 = rssErr(abY[0:99], yHat01.T)
    yHat1 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)
    error1 = rssErr(abY[0:99], yHat1.T)
    yHat10 = lwlr.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)
    error10 = rssErr(abY[0:99], yHat10.T)
    # the result show smaller kernel
    print "yHat01:", yHat01, "error01:", error01
    print "yHat1", yHat1, "error1:", error1
    print "yHat10", yHat10, "error10", error10

    print "------------------------testing-------------------------"
    yHat01 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
    error01 = rssErr(abY[100:199], yHat01.T)
    yHat1 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
    error1 = rssErr(abY[100:199], yHat1.T)
    yHat10 = lwlr.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
    error10 = rssErr(abY[100:199], yHat10.T)
    # the result show smaller kernel
    print "error01:", error01
    print "error1:", error1
    print "error10", error10
예제 #5
0
def main():
    xArr, yArr = reg.loadDataSet('ex0.txt')
    print yArr[0]
    print lwlr(xArr[0], xArr, yArr, 1.0)
    print lwlr(xArr[0], xArr, yArr, 0.001)

    yHat = lwlrTest(xArr, xArr, yArr, 0.003)
    print "yHat:", yHat

    plot(xArr, yArr)
예제 #6
0
def testAbalone():
    xArr, yArr = re.loadDataSet('abalone.txt')
    ws = re.standRegres(xArr, yArr)
    print ws
    for k in [2, 10]:
        calcErr(xArr, yArr, 0, 299, k, 300, 350, ws)
        calcErr(xArr, yArr, 0, 299, k, 350, 400, ws)
        calcErr(xArr, yArr, 0, 299, k, 400, 450, ws)
        calcErr(xArr, yArr, 0, 299, k, 450, 500, ws)
        calcErr(xArr, yArr, 0, 299, k, 500, 600, ws)
        print ''
예제 #7
0
def test1():
    xArr, yArr = regression.loadDataSet('ex0.txt')
    ws = regression.standRegress(xArr, yArr)
    xMat = np.mat(xArr)
    yMat = np.mat(yArr)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
    x = xMat.copy()
    x.sort(0)
    y = x * ws
    ax.plot(x[:, 1], y)
    plt.show()
예제 #8
0
def test2():
    xArr, yArr = regression.loadDataSet('ex0.txt')
    yHat = regression.lwlrTest(xArr, xArr, yArr, 0.01)
    xMat = np.mat(xArr)
    srtInd = xMat[:, 1].argsort(0)
    xSort = xMat[srtInd][:, 0, :]
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(xSort[:, 1], yHat[srtInd])
    ax.scatter(xMat[:, 1].flatten().A[0],
               np.mat(yArr).T.flatten().A[0],
               s=2,
               c='red')
    plt.show()
예제 #9
0
def lwlrResult(fileName, weight):
    xArr, yArr = regression.loadDataSet(fileName)
    yHat = regression.lwlrTest(xArr, xArr, yArr, weight)  #取得各点的回归系数矩阵
    # 画出回归曲线
    xMat = mat(xArr)
    srtInd = xMat[:, 1].argsort(0)
    xSort = xMat[srtInd][:, 0, :]
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(xSort[:, 1], yHat[srtInd])
    ax.scatter(xMat[:, 1].flatten().A[0],
               mat(yArr).T.flatten().A[0],
               s=2,
               c='red')
    plt.show()
예제 #10
0
파일: main.py 프로젝트: thatwaylw/pycl
def test1():
    abX, abY = regression.loadDataSet('abalone.txt')
    yHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
    yHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)
    yHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)
    print(regression.rssError(abY[0:99], yHat01.T))
    print(regression.rssError(abY[0:99], yHat1.T))
    print(regression.rssError(abY[0:99], yHat10.T))
    print('-------------------------------------------')
    yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
    yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
    yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
    print(regression.rssError(abY[100:199], yHat01.T))
    print(regression.rssError(abY[100:199], yHat1.T))
    print(regression.rssError(abY[100:199], yHat10.T))
예제 #11
0
def testLwlr():
    xArr, yArr = re.loadDataSet('ex0.txt')
    xMat = mat(xArr)
    yMat = mat(yArr)

    fig = plt.figure()
    ax = fig.add_subplot(311)  # 将画布分成1行1列,将从左到右,从上到下第一块显示图画
    privateShow(ax, xMat, yMat, 1.0)

    ax = fig.add_subplot(312)
    privateShow(ax, xMat, yMat, 0.02)

    ax = fig.add_subplot(313)
    privateShow(ax, xMat, yMat, 0.002)
    plt.show()
예제 #12
0
def lineResult(fileName):
    xArr, yArr = regression.loadDataSet(fileName)
    ws = regression.standRegres(xArr, yArr)  #取得回归系数矩阵
    # 画出回归曲线
    xMat = mat(xArr)
    yHat = xMat * ws
    fig = plt.figure()
    ax = fig.add_subplot(111)
    xCopy = xMat.copy()
    xCopy.sort(0)
    yHat = xCopy * ws
    ax.plot(xCopy[:, 1], yHat)
    ax.scatter(xMat[:, 1].flatten().A[0],
               mat(yArr).T.flatten().A[0],
               s=2,
               c='red')
    plt.show()
예제 #13
0
def plotwMat():
    """
    函数说明:绘制岭回归系数矩阵
    """
    font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    abX, abY = rg.loadDataSet('abalone.txt')
    redgeWeights = ridgeTest(abX, abY)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(redgeWeights)
    ax_title_text = ax.set_title(u'log(lambada)与回归系数的关系', FontProperties=font)
    ax_xlabel_text = ax.set_xlabel(u'log(lambada)', FontProperties=font)
    ax_ylabel_text = ax.set_ylabel(u'回归系数', FontProperties=font)
    plt.setp(ax_title_text, size=20, weight='bold', color='red')
    plt.setp(ax_xlabel_text, size=10, weight='bold', color='black')
    plt.setp(ax_ylabel_text, size=10, weight='bold', color='black')
    plt.show()
예제 #14
0
def plotstageWiseMat():
    """
    函数说明:绘制岭回归系数矩阵
    Website:
        http://www.cuijiahua.com/
    Modify:
        2017-11-20
    """
    font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    xArr, yArr = rg.loadDataSet('abalone.txt')
    returnMat = stageWise(xArr, yArr, 0.005, 1000)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(returnMat)
    ax_title_text = ax.set_title(u'前向逐步回归:迭代次数与回归系数的关系', FontProperties=font)
    ax_xlabel_text = ax.set_xlabel(u'迭代次数', FontProperties=font)
    ax_ylabel_text = ax.set_ylabel(u'回归系数', FontProperties=font)
    plt.setp(ax_title_text, size=15, weight='bold', color='red')
    plt.setp(ax_xlabel_text, size=10, weight='bold', color='black')
    plt.setp(ax_ylabel_text, size=10, weight='bold', color='black')
    plt.show()
예제 #15
0
def plotlwlrRegression():
    """
    函数说明:绘制多条局部加权回归曲线
    Parameters:
         无
    Returns:
        无
    """
    font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    xArr, yArr = rg.loadDataSet('ex0.txt')  # 加载数据集
    yHat_1 = lwlrTest(xArr, xArr, yArr, 1.0)  # 根据局部加权线性回归计算yHat
    yHat_2 = lwlrTest(xArr, xArr, yArr, 0.01)  # 根据局部加权线性回归计算yHat
    yHat_3 = lwlrTest(xArr, xArr, yArr, 0.003)  # 根据局部加权线性回归计算yHat
    xMat = np.mat(xArr)  # 创建xMat矩阵
    yMat = np.mat(yArr)  # 创建yMat矩阵
    srtInd = xMat[:, 1].argsort(0)  # 排序,返回索引值
    xSort = xMat[srtInd][:, 0, :]
    fig, axs = plt.subplots(nrows=3, ncols=1, sharex=False,
                            sharey=False, figsize=(10, 8))
    axs[0].plot(xSort[:, 1], yHat_1[srtInd], c='red')  # 绘制回归曲线
    axs[1].plot(xSort[:, 1], yHat_2[srtInd], c='red')  # 绘制回归曲线
    axs[2].plot(xSort[:, 1], yHat_3[srtInd], c='red')  # 绘制回归曲线
    axs[0].scatter(xMat[:, 1].flatten().A[0], yMat.flatten().A[0],
                   s=20, c='blue', alpha=.5)  # 绘制样本点
    axs[1].scatter(xMat[:, 1].flatten().A[0], yMat.flatten().A[0],
                   s=20, c='blue', alpha=.5)  # 绘制样本点
    axs[2].scatter(xMat[:, 1].flatten().A[0], yMat.flatten().A[0],
                   s=20, c='blue', alpha=.5)  # 绘制样本点
    # 设置标题,x轴label,y轴label
    axs0_title_text = axs[0].set_title(u'局部加权回归曲线,k=1.0', FontProperties=font)
    axs1_title_text = axs[1].set_title(u'局部加权回归曲线,k=0.01', FontProperties=font)
    axs2_title_text = axs[2].set_title(
        u'局部加权回归曲线,k=0.003', FontProperties=font)
    plt.setp(axs0_title_text, size=8, weight='bold', color='red')
    plt.setp(axs1_title_text, size=8, weight='bold', color='red')
    plt.setp(axs2_title_text, size=8, weight='bold', color='red')
    plt.xlabel('X')
    plt.show()
def abaloneTest():
    """ 预测鲍鱼的年龄

    描述:机器学习实战示例8.3 预测鲍鱼的年龄
    INPUT:
        无
    OUPUT: 
        无 
    """
    # 加载数据
    abX, abY = regression.loadDataSet("./data/abalone.txt")
    # 使用不同的核进行预测
    oldyHat01 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)
    oldyHat1 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)
    oldyHat10 = regression.lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)
    # 打印出不同的核预测值与训练数据集上的真实值之间的误差大小
    print("old yHat01 error Size is :",
          regression.rssError(abY[0:99], oldyHat01.T))
    print("old yHat1 error Size is :",
          regression.rssError(abY[0:99], oldyHat1.T))
    print("old yHat10 error Size is :",
          regression.rssError(abY[0:99], oldyHat10.T))
    # 打印出不同的核预测值与新数据集(测试数据集)上的真实值之间的误差大小
    newyHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
    print("new yHat01 error Size is :",
          regression.rssError(abY[0:99], newyHat01.T))
    newyHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
    print("new yHat1 error Size is :",
          regression.rssError(abY[0:99], newyHat1.T))
    newyHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
    print("new yHat10 error Size is :",
          regression.rssError(abY[0:99], newyHat10.T))
    # 使用简单的线性回归进行预测,与上面的计算进行比较
    standWs = regression.standRegres(abX[0:99], abY[0:99])
    standyHat = mat(abX[100:199]) * standWs
    print("standRegress error Size is:",
          regression.rssError(abY[100:199], standyHat.T.A))
예제 #17
0
#  !/usr/bin/env  python
#  -*- coding:utf-8 -*-
# @Time   :  2018.
# @Author :  绿色羽毛
# @Email  :  [email protected]
# @Blog   :  https://blog.csdn.net/ViatorSun
# @Note   :  线性回归


import regression
from numpy import *
import matplotlib.pyplot as plt

xArr , yArr = regression.loadDataSet("data.txt")

ws = regression.standRegres(xArr,yArr)


print(ws)

#预测值yHat
xMat = mat(xArr)
yMat = mat(yArr)
yHat = xMat*ws

#绘制数据集散列点
fig = plt.figure()             #创建子图
ax = fig.add_subplot(1,1,1)    #添加一个(1,1,1)子图
x = xMat[:,1].flatten().A[0]
y = yMat.T[:,0].flatten().A[0]
예제 #18
0
# print(yArr[0])
# print(regression.lwlr(xArr[0], xArr, yArr, 1.0))
# print(regression.lwlr(xArr[0], xArr, yArr, 0.001))
# 
# yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003)
# xMat = mat(xArr)
# srtInd = xMat[:, 1].argsort(0)
# xSort = xMat[srtInd][:, 0, :]
# 
# import matplotlib.pyplot as plt
# fig = plt.figure()
# ax = fig.add_subplot(111)
# # ax.plot(xSort[:, 1], yHat[srtInd])
# ax.scatter(xMat[:, 1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
# plt.show()


# abX, abY = regression.loadDataSet('../data/abalone.txt')
# ridgeWeights = regression.ridgeTest(abX, abY)
# print(ridgeWeights)
# 
# import matplotlib.pyplot as plt
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.plot(ridgeWeights)
# plt.show()



xArr, yArr = regression.loadDataSet('../data/abalone.txt')
regression.stageWise(xArr, yArr, 0.01, 200)
예제 #19
0
# -*- coding:utf-8 -*-
import regression
from numpy import *

xArr,yArr = regression.loadDataSet("abalone.txt")
#stageWeight = regression.stageWise(xArr, yArr, 0.01, 200)
#print (stageWeight)

stageWeight = regression.stageWise(xArr, yArr, 0.0001, 50000)
#print (stageWeight)
예제 #20
0
# coding: utf-8
# linear_regression/test_multiple.py
import regression
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

if __name__ == "__main__":
    srcX, y = regression.loadDataSet('data/houses.txt')

    # 新建特征
    m, n = srcX.shape
    X = regression.normalize(srcX.copy())
    X = np.concatenate((np.ones((m, 1)), X), axis=1)

    rate = 1
    maxLoop = 50
    epsilon = 1

    result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y)
    theta, errors, thetas = result

    print('theta is:')
    print(theta)
    print('........')

    # 预测价格
    normalizedSize = (1650 - srcX[:, 0].mean(0)) / srcX[:, 0].std(0)
    normalizedBr = (3 - srcX[:, 1].mean(0)) / srcX[:, 1].std(0)
예제 #21
0
파일: lwlrTest.py 프로젝트: anty-zhang/dm
# -*- coding=utf-8 -*-
import regression
from numpy import *

xArr,yArr = regression.loadDataSet("ex0.txt")

#l0 = regression.lwlr(xArr[0], xArr, yArr, 1.0)
#l1 = regression.lwlr(xArr[0],xArr,yArr,0.001)
#print ("l0 is %s" % l0)
#print ("l1 is %s" % l1)



yHat = regression.lwlrTest(xArr, xArr, yArr, 1.0)
print ("yHat is %s"  % yHat)

xMat = mat(xArr)
#axis=0 按列排序;axis=1 按行排序
#返回xMat下标编号
srtInd = xMat[:,1].argsort(0)
#print("srtInd is %s"  % srtInd)




xSort = xMat[srtInd][:,0,:]    #这是什么意思?????
#print ("xSort is %s"% xSort)



import matplotlib.pyplot as plt
# coding: utf-8
# linear_regression/test_sgd.py
import regression
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

if __name__ == "__main__":
    X, y = regression.loadDataSet('data/ex1.txt');

    m,n = X.shape
    X = np.concatenate((np.ones((m,1)), X), axis=1)

    rate = 0.01
    maxLoop = 100
    epsilon =0.01

    result, timeConsumed = regression.sgd(rate, maxLoop, epsilon, X, y)

    theta, errors, thetas = result

    # 绘制拟合曲线
    fittingFig = plt.figure()
    title = 'sgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)
    ax = fittingFig.add_subplot(111, title=title)
    trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])

    xCopy = X.copy()
    xCopy.sort(0)
예제 #23
0
def main():
    xArr, yArr = reg.loadDataSet('abalone.txt')
    rntMat =  stageWise(xArr, yArr,0.01,200)
    print rntMat
예제 #24
0
# coding: utf-8
# linear_regression/test_lwr.py
import regression
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

if __name__ == "__main__":
    srcX, y = regression.loadDataSet('data/lwr.txt')

    m, n = srcX.shape
    srcX = np.concatenate((srcX[:, 0], np.power(srcX[:, 0], 2)), axis=1)
    # 特征缩放
    X = regression.standardize(srcX.copy())
    X = np.concatenate((np.ones((m, 1)), X), axis=1)

    rate = 0.1
    maxLoop = 1000
    epsilon = 0.01

    predicateX = regression.standardize(np.matrix([[8, 64]]))

    predicateX = np.concatenate((np.ones((1, 1)), predicateX), axis=1)

    result, t = regression.lwr(rate, maxLoop, epsilon, X, y, predicateX, 1)
    theta, errors, thetas = result

    result2, t = regression.lwr(rate, maxLoop, epsilon, X, y, predicateX, 0.1)
    theta2, errors2, thetas2 = result2

    # 打印特征点
예제 #25
0
# coding: utf-8
# linear_regression/test_temperature_polynomial.py

import regression
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

if __name__ == "__main__":
    srcX, y = regression.loadDataSet('temperature.txt')

    m, n = srcX.shape
    srcX = np.concatenate((srcX[:, 0], np.power(srcX[:, 0], 2)), axis=1)
    # 特征缩放
    X = regression.standardize(srcX.copy())
    X = np.concatenate((np.ones((m, 1)), X), axis=1)

    rate = 0.1
    maxLoop = 1000
    epsilon = 0.01

    result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y)
    theta, errors, thetas = result

    # 打印特征点
    fittingFig = plt.figure()
    title = 'polynomial with bgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds' % (
        rate, maxLoop, epsilon, timeConsumed)
    ax = fittingFig.add_subplot(111, title=title)
    trainingSet = ax.scatter(srcX[:, 1].flatten().A[0], y[:, 0].flatten().A[0])
예제 #26
0
# encoding=utf-8
import regression
from numpy import *

xArr, yArr = regression.loadDataSet("filename")
ws = regression.standRegres(xArr, yArr)

'''
# 绘图
xMat = mat(xArr)
yMat = mat(yArr)
yHat = xMat * ws
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()
#相关系数:用来衡量预测值和真实值的匹配程序
corrcoef(yHat.T, yMat)
'''
# 得到数据集中所有点的估计
yHat = regression.lwlrTest(xArr, xArr, yArr, 0.003)
# # 复制,排序
# xCopy = xMat.copy()
# xCopy.sort(0)
# yHat = xCopy * ws
# # plot画线
# ax.plot(xCopy[:,1], yHat)
# # plt.show()
#
# #相关系数
# #print('corrcoef:');print(corrcoef(yHat.T,yMat))   #预测值和真实值得匹配程度
# #corrcoef:
# # [[ 1.          0.13653777]
# # [ 0.13653777  1.        ]]
# #end:Liner Regression

xArr,yArr = regression.loadDataSet('ex0.txt')
#对单点进行估计,输出预测值
print('xArr[0]:');print(xArr[0])
#  xArr[0]:
# [1.0, 0.067732]
print(yArr[0])  #output: 3.176513
print(regression.lwlr(xArr[0],xArr,yArr,1.0))  #output:  martix([[ 3.12204471]])
print(regression.lwlr(xArr[0],xArr,yArr,0.001))     #output:  martix([[ 3.20175729]])

#为了得到数据集里所有点的估计,可以调用LwlrTest()函数:
yHat = regression.lwlrTest(xArr,xArr,yArr,0.003)
print('all points about yHat:');    print(yHat)

#查看拟合效果
xMat = mat(xArr)   #xArr是什么?
srtInd = xMat[:,1].argsort(0) #对xArr排序
예제 #28
0
import regression
from numpy import *

xArr, yArr = regression.loadDataSet('abalone.txt')
regression.stageWise(xArr, yArr, 0.01, 200)

regression.stageWise(xArr, yArr, 0.001, 5000)

xMat = mat(xArr)
yMat = mat(yArr).T
xMat = regression.regularize(xMat)
yM = mean(yMat, 0)
yMat = yMat - yM
weights = regression.standRegres(xMat, yMat.T)
print(weights.T)
예제 #29
0
from regression import loadDataSet
from Multivariate import LinearRegresion
from ROOT import *
from array import array
from math import *


xs, ys = loadDataSet('ex0.txt')
x0, x1 = zip(*xs)

ndata = len(ys)
nregr = 100
dregr = 1.0/nregr

lr   = LinearRegresion(xs,ys)
lwlr = LinearRegresion(xs,ys,lambda x,y: exp(-(x-y)**(x-y)/(2*0.01**2)), 0.0001 )

xlr = [ (1.0,dregr*i) for i in range(nregr) ]
ylr = map( lr.GetValue, xlr )

xlwlr = xlr
ylwlr = map( lwlr.GetValue, xlwlr )

gdata = TGraph( ndata, array('f',x1)   , array('f',ys) )
glr   = TGraph( nregr, array('f',zip(*xlr)[1])  , array('f',ylr) )
glwlr = TGraph( nregr, array('f',zip(*xlwlr)[1]), array('f',ylwlr) )

gdata.SetMarkerStyle(20)
glr  .SetLineWidth(2)
glwlr.SetLineWidth(2)
glr  .SetLineColor(kRed)
예제 #30
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat May 12 06:19:52 2018

@author: kukuLife
"""
import numpy as np
import regression

X, y = regression.loadDataSet('ex1.txt')
m, n = X.shape
X = np.concatenate((np.ones((m, 1)), X), axis=1)

maxLoop = 1500
epsilon = 0.01
rate = 0.02

theta, thetas, errors = regression.sgd(maxLoop, rate, X, y, epsilon)
예제 #31
0
__author__ = 'sunbeansoft'

import regression as reg
import matplotlib.pyplot as plt
from numpy import *

xArr, yArr = reg.loadDataSet('ex0.txt')
# wx = reg.standRegres(xArr, yArr)
# print wx
#
# xMat = mat(xArr)
# yMat = mat(yArr)
#
# yHat = xMat * wx
# corrcoef(yHat.H, yMat)
#
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
# xCopy = xMat.copy()
# xCopy.sort(0)
# yHat = xCopy * wx
# ax.plot(xCopy[:, 1], yHat)
# plt.show()

print reg.lwlr(xArr[0], xArr, yArr, 1.0)
print reg.lwlr(xArr[0], xArr, yArr, 0.001)

yHat = reg.lwlrTest(xArr, xArr, yArr, 0.003)
xMat = mat(xArr)
srtInd = xMat[:, 1].argsort(0)
예제 #32
0
import numpy as np
import matplotlib as cm
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import regression as re

if __name__ == '__main__':
    X, y = re.loadDataSet("data/ex1.txt")  # coursera的《machine learning》第二周实验数据
    m, n = X.shape
    X = np.concatenate((np.ones((m, 1)), X), axis=1)
    theta, timeConsumed = re.standRegres(X, y)
    print('消耗[%s] s \n 参数矩阵:\n %s' % (timeConsumed, theta))

    fittingFig = plt.figure()
    title = 'StandRegress  time: %s' % timeConsumed
    ax = fittingFig.add_subplot(111, title=title)
    trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:, 0].flatten().A[0])
    xCopy = X.copy()
    xCopy.sort(0)
    yHat = xCopy * theta
    fittingLine, = ax.plot(xCopy[:, 1], yHat, color='g')
    ax.set_xlabel('Population of City in 10,000s')
    ax.set_ylabel('Profit in $10,000s')
    plt.legend([trainingSet, fittingLine],
               ['Training Set', 'Linear Regression'])
    plt.show()
예제 #33
0
def main():
    abX, abY = reg.loadDataSet('abalone.txt')
    ridgeWeights = ridgeTest(abX, abY)
    plotFigure(ridgeWeights)
예제 #34
0
import regression
import matplotlib.pyplot as plt
from numpy import *
xArr,yArr=regression.loadDataSet('ex0.txt')

#regression.lwlr(xArr[0],xArr,yArr,1.0)
#a = regression.lwlr(xArr[0],xArr,yArr,0.001)
yHat1 = regression.lwlrTest(xArr, xArr, yArr,1)
yHat2 = regression.lwlrTest(xArr, xArr, yArr,0.01)
yHat3 = regression.lwlrTest(xArr, xArr, yArr,0.003)
#print("yHat1 : %s" % (yHat1))

xMat=mat(xArr)
srtInd = xMat[:,1].argsort(0)
#print("srtInd : %s" % (srtInd))

xSort=xMat[srtInd][:,0,:]
#print("xSort : %s" % (xSort))

fig = plt.figure()
ax1 = fig.add_subplot(311)
ax1.plot(xSort[:,1],yHat1[srtInd])
ax1.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red')

ax2 = fig.add_subplot(312)
ax2.plot(xSort[:,1],yHat2[srtInd])
ax2.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red')

ax3 = fig.add_subplot(313)
ax3.plot(xSort[:,1],yHat3[srtInd])
ax3.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0] , s=2, c='red')
예제 #35
0
# This Python file uses the following encoding: utf-8
import os, sys
import regression
reload(regression)
from numpy import *
xArr,yArr=regression.loadDataSet('C:\Users\YAN\Desktop\\regression/ex0.txt')
'''
#---------标准回归----------#
print (xArr[0:2])
ws=regression.standRegres(xArr,yArr)
print ws
xMat=mat(xArr)
yMat=mat(yArr)
yHat=xMat*ws
import matplotlib.pyplot as pl
fig=pl.figure()
ax=fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
xCopy=xMat.copy(0)# sort along the first axis
yHat=xCopy*ws
ax.plot(xCopy[:,1],yHat)
pl.show()
'''
#print(corrcoef(yHat.T,yMat))
'''
print yArr[0]
print(regression.lwlr(xArr[0],xArr,yArr,1.0))
print(regression.lwlr(xArr[0],xArr,yArr,0.001))
'''
'''
#----------局部回归--------#
예제 #36
0
import regression
from numpy import *

xArray, yArray = regression.loadDataSet("./regression/abalone.txt")

# yHat01 = regression.lwlrTest(xArray[100:199], xArray[0:99], yArray[0:99], 0.1)
# yHat1 = regression.lwlrTest(xArray[100:199], xArray[0:99], yArray[0:99], 1)
# yHat10 = regression.lwlrTest(xArray[100:199], xArray[0:99], yArray[0:99], 10)

# print regression.rssError(yArray[100:199], yHat01)
# print regression.rssError(yArray[100:199], yHat1)
# print regression.rssError(yArray[100:199], yHat10)

# wsStand = regression.standRegres(xArray[0:99], yArray[0:99])
# yHat = mat(xArray[100:199]) * wsStand
# print regression.rssError(yArray[100:199], yHat.T.A)

# ridgeWeights = regression.ridgeTest(xArray, yArray)

# import matplotlib.pyplot as plt
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.plot(ridgeWeights)
# plt.show()
print regression.stageWise(xArray, yArray, 0.01, 200)
예제 #37
0
# coding: utf-8
# linear_regression/test_temperature_normal.py
import regression as re
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

if __name__ == '__main__':
    X, y = re.loadDataSet('data/temperature.txt');

    m, n = X.shape
    X = np.concatenate((np.ones((m, 1)), X), axis=1)

    rate = 0.0001
    maxLoop = 1000
    epsilon = 0.01

    result, timeConsumed = re.bgd(rate, maxLoop, epsilon, X, y)

    theta, errors, thetas = result

    # 绘制拟合曲线
    fittingFig = plt.figure()
    title = 'bgd: rate=%.3f, maxLoop=%d, epsilon=%.3f \n time: %ds' % (rate, maxLoop, epsilon, timeConsumed)
    ax = fittingFig.add_subplot(111, title=title)
    trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:, 0].flatten().A[0])

    xCopy = X.copy()
    xCopy.sort(0)
예제 #38
0
# -*- coding: utf-8 -*-
"""
Created on Fri May 12 16:07:29 2017

@author: 凯风
"""

import regression
from numpy import *
from imp import reload
import matplotlib.pyplot as plt

reload(regression)
xArr, yArr = regression.loadDataSet('ex0.txt')
xArr[0:2]
ws = regression.standRegres(xArr, yArr)  # 求回归系数
ws

xMat = mat(xArr)
yMat = mat(yArr)
yHat = xMat * ws  # 拟合曲线

# 绘制拟合直线和散点图
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()
예제 #39
0
# xArr, yArr = regression.loadDataSet('ex0.txt')
# yHat = regression.lwlrTest(xArr,xArr,yArr,0.01)
# print yHat
# xMat = mat(xArr)
# srtInd = xMat[:,1].argsort(0)
# xSort = xMat[srtInd][:,0,:]
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.plot(xSort[:,1], yHat[srtInd])
# ax.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
# plt.show()

#################### baoyu nianling #############################

abX, abY = regression.loadDataSet('abalone.txt')
# yHat01 = regression.lwlrTest(abX[0:99],abX[0:99],abY[0:99],0.1)
# yHat02 = regression.lwlrTest(abX[0:99],abX[0:99],abY[0:99],1)
# yHat03 = regression.lwlrTest(abX[0:99],abX[0:99],abY[0:99],10)

# print regression.rssError(abY[0:99], yHat01.T)
# print regression.rssError(abY[0:99], yHat02.T)
# print regression.rssError(abY[0:99], yHat03.T)

#print regression.ridgeRegres(abX, abY, 1)


# ridgeWeights = regression.ridgeTest(abX, abY)
# print ridgeWeights

# fig = plt.figure()
예제 #40
0
# -*- coding=utf-8 -*-
import regression
from numpy import *

xArr, yArr = regression.loadDataSet("ex0.txt")

#l0 = regression.lwlr(xArr[0], xArr, yArr, 1.0)
#l1 = regression.lwlr(xArr[0],xArr,yArr,0.001)
#print ("l0 is %s" % l0)
#print ("l1 is %s" % l1)

yHat = regression.lwlrTest(xArr, xArr, yArr, 1.0)
print("yHat is %s" % yHat)

xMat = mat(xArr)
#axis=0 按列排序;axis=1 按行排序
#返回xMat下标编号
srtInd = xMat[:, 1].argsort(0)
#print("srtInd is %s"  % srtInd)

xSort = xMat[srtInd][:, 0, :]  #这是什么意思?????
#print ("xSort is %s"% xSort)

import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], mat(yHat[srtInd]))  #这里有问题?????
ax.scatter(mat(xArr)[:, 1].flatten().A[0],
           mat(yArr).T.flatten().A[0],
           s=2,
           c='blue')
예제 #41
0
파일: ridgeTest.py 프로젝트: anty-zhang/dm
# -*- coding:utf-8 -*-
import regression
from numpy import *

abX,abY = regression.loadDataSet("abalone.txt")
ridgeWeight = regression.ridgeTest(abX, abY)
#print ("ridgeWeight is %s"  % ridgeWeight)


#展现回归系数与log(lam)的关系
#lam非常小时,与线性回归一致
#lam非常大时,系数全部缩减成0
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeight)
plt.show()




# 预测鲍鱼的年龄

import regression


def rssError(yArr, yHatArr):
    return ((yArr - yHatArr)**2).sum()


if __name__ == '__main__':
    xArr, yArr = regression.loadDataSet('../data/abalone.txt')
    # yHat01 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 0.1)
    # yHat1 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 1)
    # yHat10 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 10)
    # # 为了分析预测误差的大小,可以用函数 rssError() 计算出这一指标
    # print(rssError(yArr[0:99], yHat01.T))
    # print(rssError(yArr[0:99], yHat1.T))
    # print(rssError(yArr[0:99], yHat10.T))
    # print('在新数据上的误差:')
    # yHat01 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 0.1)
    # yHat1 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 1)
    # yHat10 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 10)
    # print(rssError(yArr[100:199], yHat01.T))
    # print(rssError(yArr[100:199], yHat1.T))
    # print(rssError(yArr[100:199], yHat10.T))

    # 使用岭回归的方式
    ridgeWeights = regression.ridgeTest(xArr, yArr)
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
예제 #43
0
파일: test.py 프로젝트: sundyCoder/CSK
import regression

#xArr,yArr=regression.loadDataSet('ex0.txt')
#regValue = regression.lwlr(xArr[0],xArr,yArr,1.0)
#print(regValue)
#regression.lwlr(xArr[0],xArr,yArr,0.001)

xArr,yArr=regression.loadDataSet('abalone.txt')
regression.stageWise(xArr,yArr,0.01,200)
예제 #44
0
    denom = xTx + np.eye(np.shape(xMat)[1]) * lam
    if np.linalg.det(denom) == 0.0:
        print("This matrix is singular, cannot do inverse")
        return
    ws = denom.I * (xMat.T * yMat)
    return ws


def ridgeTest(xArr, yArr):
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    yMean = np.mean(yMat, 0)
    yMat = yMat - yMean
    xMeans = np.mean(xMat, 0)
    xVar = np.var(xMat, 0)
    xMat = (xMat - xMeans) / xVar
    numTestPts = 30
    wMat = np.zeros((numTestPts, np.shape(xMat)[1]))
    for i in range(numTestPts):
        ws = ridgeRegress(xMat, yMat, np.exp(i - 10))
        wMat[i, :] = ws.T
    return wMat


abX, abY = regression.loadDataSet('abalone.txt')
ridgeWeights = ridgeTest(abX, abY)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeights)
plt.show()
예제 #45
0
파일: run.py 프로젝트: evertongago/forecast
import regression
import matplotlib.pyplot as plt
from numpy import *


xArr,yArr=regression.loadDataSet('/home/everton.gago/projetos/braskem/data/data.csv')
ws = regression.standRegres(xArr,yArr)
xMat=mat(xArr)
yMat=mat(yArr)
yHat = xMat*ws

corr = corrcoef(yHat.T, yMat)

print 'Corr Coef: ', corr

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0], yMat.T[:,0].flatten().A[0])
xCopy=xMat.copy()
xCopy.sort(0)
yHat=xCopy*ws
ax.plot(xCopy[:,1],yHat)
plt.show()