def stageWise(xArr,yArr,eps=0.01,numIt=100):
    xMat = mat(xArr); yMat=mat(yArr).T
    yMean = mean(yMat,0) #按照行取平均,每行平均
    yMat = yMat - yMean     #对y进行标准化   #can also regularize ys but will get smaller coef
    xMat = regression.regularize(xMat)
    m,n=shape(xMat)
    #returnMat = zeros((numIt,n)) #testing code remove
    ws = zeros((n,1)); wsTest = ws.copy(); wsMax = ws.copy()
    for i in range(numIt):
        print(ws.T)
        lowestError = inf;
        for j in range(n):
            for sign in [-1,1]:    #分别计算增加或减少该特征对误差的影响
                wsTest = ws.copy()
                wsTest[j] += eps*sign
                yTest = xMat*wsTest
                rssE = rssError(yMat.A,yTest.A)
                if rssE < lowestError:   #如果误差error小于当前最小误差lowesterror:设置wbest等于当前的w
                    lowestError = rssE
                    wsMax = wsTest
        ws = wsMax.copy()

    #     returnMat[i,:]=ws.T
    # return returnMat

# xArr,yArr = regression.loadDataSet('abalone.txt')
# # print('逐步向前回归结果:');print(regression.stageWise(xArr,yArr,0.001,5000))

# xMat = mat(xArr)
# yMat = mat(yArr).T
# xMat = regression.regularize(xMat)
# yM = mean(yMat,0)
# yMat = yMat - yM
# weights = regression.standRegres(xMat,yMat.T)
# print('最小二乘法:'); print(weights.T)
# #OUTPUT:
# #[[ 0.0430442  -0.02274163  0.13214087  0.02075182  2.22403814 -0.99895312
# #  -0.11725427  0.16622915]]
def stageWise(xArr,yArr,eps=0.01,numIt=100):
    xMat = mat(xArr); yMat=mat(yArr).T
    yMean = mean(yMat,0)
    yMat = yMat - yMean     #can also regularize ys but will get smaller coef
    xMat = regularize(xMat)
    m,n=shape(xMat)
    #returnMat = zeros((numIt,n)) #testing code remove
    ws = zeros((n,1)); wsTest = ws.copy(); wsMax = ws.copy()
    for i in range(numIt):
#        print(ws.T)
        lowestError = inf; 
        for j in range(n):
            for sign in [-1,1]:
                wsTest = ws.copy()
                wsTest[j] += eps*sign
                yTest = xMat*wsTest
                rssE = rssError(yMat.A,yTest.A)
                if rssE < lowestError:
                    lowestError = rssE
                    wsMax = wsTest
        ws = wsMax.copy()
        #returnMat[i,:]=ws.T
    return ws
Esempio n. 3
0
predictList = []
for tTSize in predictTaskSizeList:
    taskX = int(sqrt(tTSize))
    while(tTSize % taskX != 0):
        taskX -= 1
    taskY = tTSize/taskX
    xCheckMat = mat([myUtils.getConstantValue(), tTSize, taskX, taskY, predictHours], dtype = float)

    preDictPerTask = []
    for taskIdx in range(1,2):
        xMat, yMat = myUtils.getXandYMatfromList(trainDataList[taskIdx], 2)
        xMat = mat(xMat, dtype = float)        
        yMat = mat(yMat, dtype = float)

        #Regularize the matrix 
        xMat[:, 1:], xMeans, xStd = regression.regularize(xMat[:, 1:])         
        xCheckMat[:, 1:] = (xCheckMat[:, 1:] - xMeans)/xStd
        
        preDictPerComponent = []
        for yIdx in range(0, 2): #caculate computing result and then communication result
            yMatTmp = yMat[:, yIdx]       
            
            taskPredict = 0
            for kElement in bestKList:
                taskPredict = 0
                #get the result for cared data
                wr = regression.lwlr(xCheckMat, xMat,yMatTmp.T,kElement)
                #wr = regression.standRegres(xMat,yMatTmp.T)
                if (wr != None):
                    #print wr
                    taskPredict = xCheckMat * wr
Esempio n. 4
0
fr = open("/home/yu/workspace/machine-learning-class/ex1/ex1data2.txt")
xMat = []
yMat = []
for line in fr.readlines():
    tmpEle = line.split(',')
    xMat.append([1, tmpEle[0], tmpEle[1]])
    yMat.append([tmpEle[2]])

bestKList = [
    10
]  #[0.07, 0.3, 0.1, 0.7, 3, 10, 28, 40, 60, 80, 100]#[100, 80, 60, 40, 28, 10, 3, 0.7, 0.3, 0.07, 0.01]

xCheckMat = mat([1, 1650, 3], dtype=float)
xMat = mat(xMat, dtype=float)
yMat = mat(yMat, dtype=float)

#Regularize the matrix
xMat[:, 1:], xMeans, xStd = regression.regularize(xMat[:, 1:])
xCheckMat[:, 1:] = (xCheckMat[:, 1:] - xMeans) / xStd
for kElement in bestKList:
    taskPredict = 0
    #get the result for cared data
    wr = regression.lwlr(xCheckMat, xMat, yMat.T, kElement)
    #wr = regression.standRegres(xMat,yMat.T)
    if (wr != None):
        print wr
        taskPredict = xCheckMat * wr
        print taskPredict
        break
import regression
from numpy import *

xArr, yArr = regression.loadDataSet('abalone.txt')
regression.stageWise(xArr, yArr, 0.01, 200)

regression.stageWise(xArr, yArr, 0.001, 5000)

xMat = mat(xArr)
yMat = mat(yArr).T
xMat = regression.regularize(xMat)
yM = mean(yMat, 0)
yMat = yMat - yM
weights = regression.standRegres(xMat, yMat.T)
print(weights.T)
print regression.rssError(abY[100:199], yHat.T.A)

ridgeWeights = regression.ridgeTest(abX, abY)
#print ridgeWeights
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeights)
plt.show()

xArr, yArr = regression.loadDataSet('abalone.txt')
#regression.stageWise(xArr, yArr, 0.01, 200)
#regression.stageWise(xArr, yArr, 0.001, 5000)

xMat, yMat = mat(xArr), mat(yArr).T
xMat = regression.regularize(xMat)
yM = mean(yMat, 0)
yMat = yMat - yM
weights = regression.standRegres(xMat, yMat.T)
print weights.T

weights = regression.stageWise(xArr, yArr, 0.005, 1000)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(weights)
plt.show()

'''
lgX, lgY = [], []
regression.setDataCollect(lgX, lgY)
print shape(lgX)