예제 #1
0
    def test_lwlr(self):
        dataMat, labelMat = lr.loadDataSet("ex0.txt")
        # 对dataMat第一行的样本数据进行预测
        w = lwlr.lwlr(dataMat[0], dataMat, labelMat, 1)
        print("\n w == %s" % (w))

        yHat = lwlr.lwlrTest(dataMat, dataMat, labelMat, 1)
        print("\n yHat == %s" % (yHat))
예제 #2
0
    def test_corrcoef(self):
        dataMat, labelMat = lr.loadDataSet("ex0.txt")
        w = lr.standRegres(dataMat, labelMat)
        print("\n w == %s" % (w))

        xMat = mat(dataMat)
        yMat = mat(labelMat)
        # 预测值
        yHat = xMat * w
        # 计算预估值与真实值的相关系数
        n = corrcoef(yHat.T, yMat)
        print("\n corrcoef == %s" % (n))
예제 #3
0
    def test_abalone(self):
        data_set, label_set = lr.loadDataSet("abalone.txt")
        train_data_set = data_set[0:99]
        train_label_set = label_set[0:99]
        yHat01 = lwlr.lwlrTest(train_data_set, train_data_set, train_label_set,
                               0.1)
        yHat1 = lwlr.lwlrTest(train_data_set, train_data_set, train_label_set,
                              1)
        yHat10 = lwlr.lwlrTest(train_data_set, train_data_set, train_label_set,
                               10)
        # 计算不同k值的误差值:
        error01 = abalone.rssError(train_label_set, yHat01.T)
        # error01 == 56.7987246777
        print("\n error01 == %s" % (error01))

        error1 = abalone.rssError(train_label_set, yHat1.T)
        # error1 == 429.89056187
        print("\n error1 == %s" % (error1))

        error10 = abalone.rssError(train_label_set, yHat10.T)
        # error10 == 549.118170883
        print("\n error10 == %s" % (error10))
        # 由上述结果发现,使用最小的核 k=0.1 得到最低的误差。但是对于新数据则不然。

        new_data_set = data_set[100:199]
        new_label_set = label_set[100:199]
        # 使用全新数据计算不同k值的误差值:(发现k=10的误差最小)
        yHat01 = lwlr.lwlrTest(new_data_set, train_data_set, train_label_set,
                               0.1)
        yHat1 = lwlr.lwlrTest(new_data_set, train_data_set, train_label_set, 1)
        yHat10 = lwlr.lwlrTest(new_data_set, train_data_set, train_label_set,
                               10)
        # 计算不同k值的误差值:
        error01 = abalone.rssError(new_label_set, yHat01.T)
        # error01 == 56.7987246777
        print("\n\n\n error01 == %s" % (error01))

        error1 = abalone.rssError(new_label_set, yHat1.T)
        # error1 == 429.89056187
        print("\n error1 == %s" % (error1))

        error10 = abalone.rssError(new_label_set, yHat10.T)
        # error10 == 549.118170883
        print("\n error10 == %s" % (error10))
예제 #4
0
    def test_lr(self):
        dataMat, labelMat = lr.loadDataSet("ex0.txt")
        w = lr.standRegres(dataMat, labelMat)
        print("\n w == %s" % (w))

        xMat = mat(dataMat)
        yMat = mat(labelMat)
        # 预测值
        yHat = xMat * w

        figure = plt.figure()
        ax = figure.add_subplot(111)
        ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])

        xCopy = xMat.copy()
        # 进行排序
        xCopy.sort(0)
        yHat1 = xCopy * w
        print("\n xCopy[:, 1] == %s" % (xCopy[:, 1]))
        x = xCopy[:, 1]
        y = yHat1
        # 画直线
        ax.plot(x.A, y.A, "r")
        plt.show()
예제 #5
0
 def test_lwlr_plot(self):
     dataMat, labelMat = lr.loadDataSet("ex0.txt")
     # k = 1 则与线性回归基本一致(当k=0.01时,预测结果比较好)
     lwlr.lwlrPlot(dataMat, labelMat, 0.01)