def test_lwlr(self): dataMat, labelMat = lr.loadDataSet("ex0.txt") # 对dataMat第一行的样本数据进行预测 w = lwlr.lwlr(dataMat[0], dataMat, labelMat, 1) print("\n w == %s" % (w)) yHat = lwlr.lwlrTest(dataMat, dataMat, labelMat, 1) print("\n yHat == %s" % (yHat))
def test_corrcoef(self): dataMat, labelMat = lr.loadDataSet("ex0.txt") w = lr.standRegres(dataMat, labelMat) print("\n w == %s" % (w)) xMat = mat(dataMat) yMat = mat(labelMat) # 预测值 yHat = xMat * w # 计算预估值与真实值的相关系数 n = corrcoef(yHat.T, yMat) print("\n corrcoef == %s" % (n))
def test_abalone(self): data_set, label_set = lr.loadDataSet("abalone.txt") train_data_set = data_set[0:99] train_label_set = label_set[0:99] yHat01 = lwlr.lwlrTest(train_data_set, train_data_set, train_label_set, 0.1) yHat1 = lwlr.lwlrTest(train_data_set, train_data_set, train_label_set, 1) yHat10 = lwlr.lwlrTest(train_data_set, train_data_set, train_label_set, 10) # 计算不同k值的误差值: error01 = abalone.rssError(train_label_set, yHat01.T) # error01 == 56.7987246777 print("\n error01 == %s" % (error01)) error1 = abalone.rssError(train_label_set, yHat1.T) # error1 == 429.89056187 print("\n error1 == %s" % (error1)) error10 = abalone.rssError(train_label_set, yHat10.T) # error10 == 549.118170883 print("\n error10 == %s" % (error10)) # 由上述结果发现,使用最小的核 k=0.1 得到最低的误差。但是对于新数据则不然。 new_data_set = data_set[100:199] new_label_set = label_set[100:199] # 使用全新数据计算不同k值的误差值:(发现k=10的误差最小) yHat01 = lwlr.lwlrTest(new_data_set, train_data_set, train_label_set, 0.1) yHat1 = lwlr.lwlrTest(new_data_set, train_data_set, train_label_set, 1) yHat10 = lwlr.lwlrTest(new_data_set, train_data_set, train_label_set, 10) # 计算不同k值的误差值: error01 = abalone.rssError(new_label_set, yHat01.T) # error01 == 56.7987246777 print("\n\n\n error01 == %s" % (error01)) error1 = abalone.rssError(new_label_set, yHat1.T) # error1 == 429.89056187 print("\n error1 == %s" % (error1)) error10 = abalone.rssError(new_label_set, yHat10.T) # error10 == 549.118170883 print("\n error10 == %s" % (error10))
def test_lr(self): dataMat, labelMat = lr.loadDataSet("ex0.txt") w = lr.standRegres(dataMat, labelMat) print("\n w == %s" % (w)) xMat = mat(dataMat) yMat = mat(labelMat) # 预测值 yHat = xMat * w figure = plt.figure() ax = figure.add_subplot(111) ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0]) xCopy = xMat.copy() # 进行排序 xCopy.sort(0) yHat1 = xCopy * w print("\n xCopy[:, 1] == %s" % (xCopy[:, 1])) x = xCopy[:, 1] y = yHat1 # 画直线 ax.plot(x.A, y.A, "r") plt.show()
def test_lwlr_plot(self): dataMat, labelMat = lr.loadDataSet("ex0.txt") # k = 1 则与线性回归基本一致(当k=0.01时,预测结果比较好) lwlr.lwlrPlot(dataMat, labelMat, 0.01)